onnxruntime c++ 代码搜集

最新推荐文章于 2024-06-08 21:55:18 发布

落花逐流水

最新推荐文章于 2024-06-08 21:55:18 发布

阅读量3.1k

点赞数 2

分类专栏： onnx C++

本文链接：https://blog.csdn.net/juluwangriyue/article/details/118265373

版权

C++ 同时被 2 个专栏收录

140 篇文章 3 订阅

订阅专栏

onnx

15 篇文章 2 订阅

订阅专栏

1、EfficientDet

EfficientDet.h

#include <core/session/onnxruntime_cxx_api.h>
#include <cstring>
#include <array>
#include <cmath>
#include <algorithm>
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "providers.h"
 
class EfficientDetOnnxRT {
 public:
 
  EfficientDetOnnxRT(std::string onnx_file,unsigned int numClasses); 
  
  ~EfficientDetOnnxRT(); 
 
  ...
  
  bool processInput(float* buffer,cv::Mat& cv_img_origin); 
 
  std::vector<std::vector<float>> processOutput(float* regressionOutputBuffer,float* classificationOutputBuffer,
                 float* anchorsOutputBuffer,float nms_threshold, float score_thresholdi); 
 
  std::vector<std::vector<float>> infer(cv::Mat& cv_img, float nms_threshold, float score_threshold ); 
 
 
 private:
  ...
 
  char* input_names[1]{nullptr};
  float* input_image_{nullptr};
  Ort::Value input_tensor_{(Ort::Value)nullptr};
  std::array<int64_t, 4> input_shape_;
 
  Ort::Env env_ {ORT_LOGGING_LEVEL_ERROR, "EfficientDetOnnxEnv"};  
  Ort::Session session_ {nullptr};
 
  char* output_names[3]{nullptr,nullptr,nullptr};
  Ort::Value output_tensor_[3]{(Ort::Value)nullptr,(Ort::Value)nullptr,(Ort::Value)nullptr};
  std::array<int64_t,3> output_shape_regression;     
  std::array<int64_t,3> output_shape_classification; 
  std::array<int64_t,3> output_shape_anchors;        
  float* results_regression{nullptr};    
  float* results_classification{nullptr}; 
  float* results_anchors{nullptr};       
 
  ...
 
};

EfficientDet.cpp

EfficientDetOnnxRT::EfficientDetOnnxRT(std::string onnx_file,unsigned int numClasses) {
 
    Ort::SessionOptions op;
    op.SetLogSeverityLevel(ORT_LOGGING_LEVEL_ERROR);
    int device_id = 0;
    
    std::cout <<"onnxruntime loading onnx model..." <<std::endl;
#if defined(USE_TENSORRT)
      std::cout <<"onnxruntime running with TensorRT..." <<std::endl;
      Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(op, device_id));
      
#elif defined(USE_CUDA)
      OrtCUDAProviderOptions cuda_options{
          0,
          OrtCudnnConvAlgoSearch::EXHAUSTIVE,
          std::numeric_limits<size_t>::max(),
          0,
          true
      };
      Ort::ThrowOnError(op.OrtSessionOptionsAppendExecutionProvider_CUDA(op, &cuda_options));
#endif
    session_ = Ort::Session(env_, onnx_file.c_str(), op);
 
    Ort::AllocatorWithDefaultOptions ort_alloc;
    char* tmp = session_.GetInputName(0, ort_alloc);
    input_names[0] = strdup(tmp);
    ort_alloc.Free(tmp);
    tmp = session_.GetOutputName(0, ort_alloc);
    output_names[0] = strdup(tmp);
    ort_alloc.Free(tmp);
    tmp = session_.GetOutputName(1, ort_alloc);
    output_names[1] = strdup(tmp);
    ort_alloc.Free(tmp);
    tmp = session_.GetOutputName(2, ort_alloc);
    output_names[2] = strdup(tmp);
    ort_alloc.Free(tmp);
   
    Ort::TypeInfo info = session_.GetInputTypeInfo(0); 
    auto tensor_info = info.GetTensorTypeAndShapeInfo();
    size_t dim_count = tensor_info.GetDimensionsCount();
    std::vector<int64_t> dims(dim_count);
    tensor_info.GetDimensions(dims.data(), dims.size());
    channels_ = dims[1];
    height_ = dims[2];
    width_  = dims[3];
    input_shape_[0]= dims[0];
    input_shape_[1]= channels_;
    input_shape_[2]= height_; 
    input_shape_[3]= width_;
 
 
    info = session_.GetOutputTypeInfo(0); 
    auto tensor_info2 = info.GetTensorTypeAndShapeInfo();
    dim_count = tensor_info.GetDimensionsCount();
    dims.clear();
    dims.resize(dim_count);
    tensor_info2.GetDimensions(dims.data(), dims.size());
    for (int i=0; i< dims.size();i++)
      output_shape_regression[i] = dims[i];
 
  
    info = session_.GetOutputTypeInfo(1); 
    auto tensor_info3 = info.GetTensorTypeAndShapeInfo();
    dim_count = tensor_info3.GetDimensionsCount();
    dims.clear();
    dims.resize(dim_count);
    tensor_info3.GetDimensions(dims.data(), dims.size());
    for (int i=0; i< dims.size();i++)
      output_shape_classification[i] = dims[i];
 
    numClassScores_ = static_cast<unsigned int>(dims[2]);
    output_shape_classification[3] = numClassScores_;
    ...
 
    info = session_.GetOutputTypeInfo(2); 
    auto tensor_info4 = info.GetTensorTypeAndShapeInfo();
    dim_count = tensor_info4.GetDimensionsCount();
    dims.clear();
    dims.resize(dim_count);
    tensor_info4.GetDimensions(dims.data(), dims.size());
    for (int i=0; i< dims.size();i++)
      output_shape_anchors[i] = dims[i];
    ... 
    
    int size_anchors = dims[1] * dims[2];
    int size_classification = dims[1];
    results_regression = new float[size_anchors];
    results_classification = new float[size_classification];
    results_anchors = new float[size_anchors];  
 
    int size_image_data = channels_ * width_ * height_;
    input_image_ = new float[size_image_data];
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
    input_tensor_ = Ort::Value::CreateTensor<float>(memory_info, input_image_, size_image_data, input_shape_.data(), input_shape_.size());
    output_tensor_[0] = Ort::Value::CreateTensor<float>(memory_info, results_regression, size_anchors, 
                                       output_shape_regression.data(), output_shape_regression.size());
    output_tensor_[1] = Ort::Value::CreateTensor<float>(memory_info, results_classification, size_classification,
                                       output_shape_classification.data(), output_shape_classification.size());
    output_tensor_[2] = Ort::Value::CreateTensor<float>(memory_info, results_anchors, size_anchors,
                                       output_shape_anchors.data(), output_shape_anchors.size());
  }
 
...
 
std::vector<std::vector<float>> EfficientDetOnnxRT::infer(cv::Mat& cv_img, float nms_threshold, float score_threshold ) {
    memset(input_image_,0, channels_ * height_ * width_ * sizeof(float));
    processInput(input_image_,cv_img);   
 
    session_.Run(Ort::RunOptions{nullptr}, &input_names[0], &input_tensor_, 1, &output_names[0], &output_tensor_[0], 3); 
    return 
 processOutput(results_regression,results_classification,results_anchors,nms_threshold,score_threshold);
  }
 
...

参考：onnxruntime调用AI模型的python和C++编程

2、squeezenet

// https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/CXX_Api_Sample.cpp
// https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_cxx_api.h
#include <cuda_provider_factory.h>
#include <onnxruntime_cxx_api.h>

#include <opencv2/dnn/dnn.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>

#include <chrono>
#include <cmath>
#include <exception>
#include <fstream>
#include <iostream>
#include <limits>
#include <numeric>
#include <string>
#include <vector>

template <typename T>
T vectorProduct(const std::vector<T>& v)
{
    return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
}

/**
 * @brief Operator overloading for printing vectors
 * @tparam T
 * @param os
 * @param v
 * @return std::ostream&
 */
template <typename T>
std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
{
    os << "[";
    for (int i = 0; i < v.size(); ++i)
    {
        os << v[i];
        if (i != v.size() - 1)
        {
            os << ", ";
        }
    }
    os << "]";
    return os;
}

/**
 * @brief Print ONNX tensor data type
 * https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_c_api.h#L93
 * @param os
 * @param type
 * @return std::ostream&
 */
std::ostream& operator<<(std::ostream& os,
                         const ONNXTensorElementDataType& type)
{
    switch (type)
    {
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED:
            os << "undefined";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
            os << "float";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
            os << "uint8_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
            os << "int8_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
            os << "uint16_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16:
            os << "int16_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
            os << "int32_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
            os << "int64_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING:
            os << "std::string";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
            os << "bool";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
            os << "float16";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE:
            os << "double";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32:
            os << "uint32_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64:
            os << "uint64_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64:
            os << "float real + float imaginary";
            break;
        case ONNXTensorElementDataType::
            ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128:
            os << "double real + float imaginary";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16:
            os << "bfloat16";
            break;
        default:
            break;
    }

    return os;
}

std::vector<std::string> readLabels(std::string& labelFilepath)
{
    std::vector<std::string> labels;
    std::string line;
    std::ifstream fp(labelFilepath);
    while (std::getline(fp, line))
    {
        labels.push_back(line);
    }
    return labels;
}

int main(int argc, char* argv[])
{
    bool useCUDA{true};
    const char* useCUDAFlag = "--use_cuda";
    const char* useCPUFlag = "--use_cpu";
    if (argc == 1)
    {
        useCUDA = false;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCUDAFlag) == 0))
    {
        useCUDA = true;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCPUFlag) == 0))
    {
        useCUDA = false;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCUDAFlag) != 0))
    {
        useCUDA = false;
    }
    else
    {
        throw std::runtime_error{"Too many arguments."};
    }

    if (useCUDA)
    {
        std::cout << "Inference Execution Provider: CUDA" << std::endl;
    }
    else
    {
        std::cout << "Inference Execution Provider: CPU" << std::endl;
    }

    std::string instanceName{"image-classification-inference"};
    std::string modelFilepath{"../../data/models/squeezenet1.1-7.onnx"};
    std::string imageFilepath{
        "../../data/images/european-bee-eater-2115564_1920.jpg"};
    std::string labelFilepath{"../../data/labels/synset.txt"};

    std::vector<std::string> labels{readLabels(labelFilepath)};

    // https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_c_api.h#L123
    Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
                 instanceName.c_str());
    Ort::SessionOptions sessionOptions;
    sessionOptions.SetIntraOpNumThreads(1);
    if (useCUDA)
    {
        // Using CUDA backend
        // https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/providers/cuda/cuda_provider_factory.h#L13
        OrtStatus* status =
            OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);
    }

    // Sets graph optimization level
    // Available levels are
    // ORT_DISABLE_ALL -> To disable all optimizations
    // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node
    // removals) ORT_ENABLE_EXTENDED -> To enable extended optimizations
    // (Includes level 1 + more complex optimizations like node fusions)
    // ORT_ENABLE_ALL -> To Enable All possible optimizations
    sessionOptions.SetGraphOptimizationLevel(
        GraphOptimizationLevel::ORT_ENABLE_EXTENDED);

    Ort::Session session(env, modelFilepath.c_str(), sessionOptions);

    Ort::AllocatorWithDefaultOptions allocator;

    size_t numInputNodes = session.GetInputCount();
    size_t numOutputNodes = session.GetOutputCount();

    std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
    std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;

    const char* inputName = session.GetInputName(0, allocator);
    std::cout << "Input Name: " << inputName << std::endl;

    Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
    auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
    std::cout << "Input Type: " << inputType << std::endl;

    std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
    std::cout << "Input Dimensions: " << inputDims << std::endl;

    const char* outputName = session.GetOutputName(0, allocator);
    std::cout << "Output Name: " << outputName << std::endl;

    Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
    auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
    std::cout << "Output Type: " << outputType << std::endl;

    std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
    std::cout << "Output Dimensions: " << outputDims << std::endl;

    cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR);
    cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
    cv::resize(imageBGR, resizedImageBGR,
               cv::Size(inputDims.at(2), inputDims.at(3)),
               cv::InterpolationFlags::INTER_CUBIC);
    cv::cvtColor(resizedImageBGR, resizedImageRGB,
                 cv::ColorConversionCodes::COLOR_BGR2RGB);
    resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);

    cv::Mat channels[3];
    cv::split(resizedImage, channels);
    // Normalization per channel
    // Normalization parameters obtained from
    // https://github.com/onnx/models/tree/master/vision/classification/squeezenet
    channels[0] = (channels[0] - 0.485) / 0.229;
    channels[1] = (channels[1] - 0.456) / 0.224;
    channels[2] = (channels[2] - 0.406) / 0.225;
    cv::merge(channels, 3, resizedImage);
    // HWC to CHW
    cv::dnn::blobFromImage(resizedImage, preprocessedImage);

    size_t inputTensorSize = vectorProduct(inputDims);
    std::vector<float> inputTensorValues(inputTensorSize);
    inputTensorValues.assign(preprocessedImage.begin<float>(),
                             preprocessedImage.end<float>());

    size_t outputTensorSize = vectorProduct(outputDims);
    assert(("Output tensor size should equal to the label set size.",
            labels.size() == outputTensorSize));
    std::vector<float> outputTensorValues(outputTensorSize);

    std::vector<const char*> inputNames{inputName};
    std::vector<const char*> outputNames{outputName};
    std::vector<Ort::Value> inputTensors;
    std::vector<Ort::Value> outputTensors;

    Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
        OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
    inputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, inputTensorValues.data(), inputTensorSize, inputDims.data(),
        inputDims.size()));
    outputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, outputTensorValues.data(), outputTensorSize,
        outputDims.data(), outputDims.size()));

    // https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_cxx_api.h#L353
    session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                inputTensors.data(), 1, outputNames.data(),
                outputTensors.data(), 1);

    int predId = 0;
    float activation = 0;
    float maxActivation = std::numeric_limits<float>::lowest();
    float expSum = 0;
    for (int i = 0; i < labels.size(); i++)
    {
        activation = outputTensorValues.at(i);
        expSum += std::exp(activation);
        if (activation > maxActivation)
        {
            predId = i;
            maxActivation = activation;
        }
    }
    std::cout << "Predicted Label ID: " << predId << std::endl;
    std::cout << "Predicted Label: " << labels.at(predId) << std::endl;
    std::cout << "Uncalibrated Confidence: " << std::exp(maxActivation) / expSum
              << std::endl;

    // Measure latency
    int numTests{100};
    std::chrono::steady_clock::time_point begin =
        std::chrono::steady_clock::now();
    for (int i = 0; i < numTests; i++)
    {
        session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                    inputTensors.data(), 1, outputNames.data(),
                    outputTensors.data(), 1);
    }
    std::chrono::steady_clock::time_point end =
        std::chrono::steady_clock::now();
    std::cout << "Minimum Inference Latency: "
              << std::chrono::duration_cast<std::chrono::milliseconds>(end -
                                                                       begin)
                         .count() /
                     static_cast<float>(numTests)
              << " ms" << std::endl;
}

参考：https://github.com/leimao/ONNX-Runtime-Inference

3、squeezenet

// Copyright(c) Microsoft Corporation.All rights reserved.
// Licensed under the MIT License.
//

#include <assert.h>
#include <vector>
#include <onnxruntime_cxx_api.h>

int main(int argc, char* argv[]) {
  //*************************************************************************
  // initialize  enviroment...one enviroment per process
  // enviroment maintains thread pools and other state info
  Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");

  // initialize session options if needed
  Ort::SessionOptions session_options;
  session_options.SetIntraOpNumThreads(1);

  // If onnxruntime.dll is built with CUDA enabled, we can uncomment out this line to use CUDA for this
  // session (we also need to include cuda_provider_factory.h above which defines it)
  // #include "cuda_provider_factory.h"
  // OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 1);

  // Sets graph optimization level
  // Available levels are
  // ORT_DISABLE_ALL -> To disable all optimizations
  // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals)
  // ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions)
  // ORT_ENABLE_ALL -> To Enable All possible opitmizations
  session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);

  //*************************************************************************
  // create session and load model into memory
  // using squeezenet version 1.3
  // URL = https://github.com/onnx/models/tree/master/squeezenet
#ifdef _WIN32
  const wchar_t* model_path = L"squeezenet.onnx";
#else
  const char* model_path = "squeezenet.onnx";
#endif

  printf("Using Onnxruntime C++ API\n");
  Ort::Session session(env, model_path, session_options);

  //*************************************************************************
  // print model input layer (node names, types, shape etc.)
  Ort::AllocatorWithDefaultOptions allocator;

  // print number of model input nodes
  size_t num_input_nodes = session.GetInputCount();
  std::vector<const char*> input_node_names(num_input_nodes);
  std::vector<int64_t> input_node_dims;  // simplify... this model has only 1 input node {1, 3, 224, 224}.
                                         // Otherwise need vector<vector<>>

  printf("Number of inputs = %zu\n", num_input_nodes);

  // iterate over all input nodes
  for (int i = 0; i < num_input_nodes; i++) {
    // print input node names
    char* input_name = session.GetInputName(i, allocator);
    printf("Input %d : name=%s\n", i, input_name);
    input_node_names[i] = input_name;

    // print input node types
    Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType type = tensor_info.GetElementType();
    printf("Input %d : type=%d\n", i, type);

    // print input shapes/dims
    input_node_dims = tensor_info.GetShape();
    printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
    for (int j = 0; j < input_node_dims.size(); j++)
      printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]);
  }

  // Results should be...
  // Number of inputs = 1
  // Input 0 : name = data_0
  // Input 0 : type = 1
  // Input 0 : num_dims = 4
  // Input 0 : dim 0 = 1
  // Input 0 : dim 1 = 3
  // Input 0 : dim 2 = 224
  // Input 0 : dim 3 = 224

  //*************************************************************************
  // Similar operations to get output node information.
  // Use OrtSessionGetOutputCount(), OrtSessionGetOutputName()
  // OrtSessionGetOutputTypeInfo() as shown above.

  //*************************************************************************
  // Score the model using sample data, and inspect values

  size_t input_tensor_size = 224 * 224 * 3;  // simplify ... using known dim values to calculate size
                                             // use OrtGetTensorShapeElementCount() to get official size!

  std::vector<float> input_tensor_values(input_tensor_size);
  std::vector<const char*> output_node_names = {"softmaxout_1"};

  // initialize input data with values in [0.0, 1.0]
  for (unsigned int i = 0; i < input_tensor_size; i++)
    input_tensor_values[i] = (float)i / (input_tensor_size + 1);

  // create input tensor object from data values
  auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
  Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
  assert(input_tensor.IsTensor());

  // score model & input tensor, get back output tensor
  auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
  assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());

  // Get pointer to output tensor float values
  float* floatarr = output_tensors.front().GetTensorMutableData<float>();
  assert(abs(floatarr[0] - 0.000045) < 1e-6);

  // score the model, and print scores for first 5 classes
  for (int i = 0; i < 5; i++)
    printf("Score for class [%d] =  %f\n", i, floatarr[i]);

  // Results should be as below...
  // Score for class[0] = 0.000045
  // Score for class[1] = 0.003846
  // Score for class[2] = 0.000125
  // Score for class[3] = 0.001180
  // Score for class[4] = 0.001317
  printf("Done!\n");
  return 0;
}

/*
Copyright (C) 2021, Intel Corporation
SPDX-License-Identifier: Apache-2.0
Portions of this software are copyright of their respective authors and released under the MIT license:
- ONNX-Runtime-Inference, Copyright 2020 Lei Mao. For licensing see https://github.com/leimao/ONNX-Runtime-Inference/blob/main/LICENSE.md
*/

#include <onnxruntime_cxx_api.h>
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>

#include <chrono>
#include <cmath>
#include <exception>
#include <fstream>
#include <iostream>
#include <limits>
#include <numeric>
#include <string>
#include <vector>
#include <stdexcept> // To use runtime_error

template <typename T>
T vectorProduct(const std::vector<T>& v)
{
    return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
}

/**
 * @brief Operator overloading for printing vectors
 * @tparam T
 * @param os
 * @param v
 * @return std::ostream&
 */

template <typename T>
std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
{
    os << "[";
    for (int i = 0; i < v.size(); ++i)
    {
        os << v[i];
        if (i != v.size() - 1)
        {
            os << ", ";
        }
    }
    os << "]";
    return os;
}

// Function to validate the input image file extension.
bool imageFileExtension(std::string str)
{
  // is empty throw error
  if (str.empty())
    throw std::runtime_error("[ ERROR ] The image File path is empty");

  size_t pos = str.rfind('.');
  if (pos == std::string::npos)
    return false;

  std::string ext = str.substr(pos+1);

  if (ext == "jpg" || ext == "jpeg" || ext == "gif" || ext == "png" || ext == "jfif" || 
        ext == "JPG" || ext == "JPEG" || ext == "GIF" || ext == "PNG" || ext == "JFIF") {
            return true;
  }

  return false;
}

// Function to read the labels from the labelFilepath.
std::vector<std::string> readLabels(std::string& labelFilepath)
{
    std::vector<std::string> labels;
    std::string line;
    std::ifstream fp(labelFilepath);
    while (std::getline(fp, line))
    {
        labels.push_back(line);
    }
    return labels;
}

// Function to validate the input model file extension.
bool checkModelExtension(const std::string& filename)
{
    if(filename.empty())
    {
        throw std::runtime_error("[ ERROR ] The Model file path is empty");
    }
    size_t pos = filename.rfind('.');
    if (pos == std::string::npos)
        return false;
    std::string ext = filename.substr(pos+1);
    if (ext == "onnx")
        return true;
    return false;
}

// Function to validate the Label file extension.
bool checkLabelFileExtension(const std::string& filename)
{
    size_t pos = filename.rfind('.');
    if (filename.empty())
    {
        throw std::runtime_error("[ ERROR ] The Label file path is empty");
    }
    if (pos == std::string::npos)
        return false;
    std::string ext = filename.substr(pos+1);
    if (ext == "txt") {
        return true;
    } else {
        return false;
    }
}

//Handling divide by zero
float division(float num, float den){
   if (den == 0) {
      throw std::runtime_error("[ ERROR ] Math error: Attempted to divide by Zero\n");
   }
   return (num / den);
}

void printHelp() {
    std::cout << "To run the model, use the following command:\n";
    std::cout << "Example: ./run_squeezenet --use_openvino <path_to_the_model> <path_to_the_image> <path_to_the_classes_file>" << std::endl;
    std::cout << "\n To Run using OpenVINO EP.\nExample: ./run_squeezenet --use_openvino squeezenet1.1-7.onnx demo.jpeg synset.txt \n" << std::endl;
    std::cout << "\n To Run on Default CPU.\n Example: ./run_squeezenet --use_cpu squeezenet1.1-7.onnx demo.jpeg synset.txt \n" << std::endl;
}

int main(int argc, char* argv[])
{
    bool useOPENVINO{true};
    const char* useOPENVINOFlag = "--use_openvino";
    const char* useCPUFlag = "--use_cpu";

    if(argc == 2) {
        std::string option = argv[1];
        if (option == "--help" || option == "-help" || option == "--h" || option == "-h") {
            printHelp();
        }
        return 0;
    } else if(argc != 5) {
        std::cout << "[ ERROR ] you have used the wrong command to run your program." << std::endl;
        printHelp();
        return 0;
    } else if (strcmp(argv[1], useOPENVINOFlag) == 0) {
        useOPENVINO = true;
    } else if (strcmp(argv[1], useCPUFlag) == 0) {
        useOPENVINO = false;
    }

    if (useOPENVINO)
    {
        std::cout << "Inference Execution Provider: OPENVINO" << std::endl;
    }
    else
    {
        std::cout << "Inference Execution Provider: CPU" << std::endl;
    }

    std::string instanceName{"image-classification-inference"};

    std::string modelFilepath = argv[2]; // .onnx file

    //validate ModelFilePath
    checkModelExtension(modelFilepath);
    if(!checkModelExtension(modelFilepath)) {
        throw std::runtime_error("[ ERROR ] The ModelFilepath is not correct. Make sure you are setting the path to an onnx model file (.onnx)");
    }
    std::string imageFilepath = argv[3];

    // Validate ImageFilePath
    imageFileExtension(imageFilepath);
    if(!imageFileExtension(imageFilepath)) {
        throw std::runtime_error("[ ERROR ] The imageFilepath doesn't have correct image extension. Choose from jpeg, jpg, gif, png, PNG, jfif");
    }
    std::ifstream f(imageFilepath.c_str());
    if(!f.good()) {
        throw std::runtime_error("[ ERROR ] The imageFilepath is not set correctly or doesn't exist");
    }

    // Validate LabelFilePath
    std::string labelFilepath = argv[4];
    if(!checkLabelFileExtension(labelFilepath)) {
        throw std::runtime_error("[ ERROR ] The LabelFilepath is not set correctly and the labels file should end with extension .txt");
    }

    std::vector<std::string> labels{readLabels(labelFilepath)};

    Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
                 instanceName.c_str());
    Ort::SessionOptions sessionOptions;
    sessionOptions.SetIntraOpNumThreads(1);

    //Appending OpenVINO Execution Provider API
    if (useOPENVINO) {
        // Using OPENVINO backend
        OrtOpenVINOProviderOptions options;
        options.device_type = "CPU_FP32"; //Other options are: GPU_FP32, GPU_FP16, MYRIAD_FP16
        std::cout << "OpenVINO device type is set to: " << options.device_type << std::endl;
        sessionOptions.AppendExecutionProvider_OpenVINO(options);
    }
    
    // Sets graph optimization level
    // Available levels are
    // ORT_DISABLE_ALL -> To disable all optimizations
    // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node
    // removals) ORT_ENABLE_EXTENDED -> To enable extended optimizations
    // (Includes level 1 + more complex optimizations like node fusions)
    // ORT_ENABLE_ALL -> To Enable All possible optimizations
    sessionOptions.SetGraphOptimizationLevel(
        GraphOptimizationLevel::ORT_DISABLE_ALL);

    //Creation: The Ort::Session is created here
    Ort::Session session(env, modelFilepath.c_str(), sessionOptions);

    Ort::AllocatorWithDefaultOptions allocator;

    size_t numInputNodes = session.GetInputCount();
    size_t numOutputNodes = session.GetOutputCount();

    std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
    std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;

    const char* inputName = session.GetInputName(0, allocator);
    std::cout << "Input Name: " << inputName << std::endl;

    Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
    auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
    std::cout << "Input Type: " << inputType << std::endl;

    std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
    std::cout << "Input Dimensions: " << inputDims << std::endl;

    const char* outputName = session.GetOutputName(0, allocator);
    std::cout << "Output Name: " << outputName << std::endl;

    Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
    auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
    std::cout << "Output Type: " << outputType << std::endl;

    std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
    std::cout << "Output Dimensions: " << outputDims << std::endl;
    //pre-processing the Image
    // step 1: Read an image in HWC BGR UINT8 format.
    cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR);

    // step 2: Resize the image.
    cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
    cv::resize(imageBGR, resizedImageBGR,
               cv::Size(inputDims.at(2), inputDims.at(3)),
               cv::InterpolationFlags::INTER_CUBIC);

    // step 3: Convert the image to HWC RGB UINT8 format.
    cv::cvtColor(resizedImageBGR, resizedImageRGB,
                 cv::ColorConversionCodes::COLOR_BGR2RGB);
    // step 4: Convert the image to HWC RGB float format by dividing each pixel by 255.
    resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);

    // step 5: Split the RGB channels from the image.   
    cv::Mat channels[3];
    cv::split(resizedImage, channels);

    //step 6: Normalize each channel.
    // Normalization per channel
    // Normalization parameters obtained from
    // https://github.com/onnx/models/tree/master/vision/classification/squeezenet
    channels[0] = (channels[0] - 0.485) / 0.229;
    channels[1] = (channels[1] - 0.456) / 0.224;
    channels[2] = (channels[2] - 0.406) / 0.225;

    //step 7: Merge the RGB channels back to the image.
    cv::merge(channels, 3, resizedImage);

    // step 8: Convert the image to CHW RGB float format.
    // HWC to CHW
    cv::dnn::blobFromImage(resizedImage, preprocessedImage);


    //Run Inference

    /* To run inference using ONNX Runtime, the user is responsible for creating and managing the 
    input and output buffers. These buffers could be created and managed via std::vector.
    The linear-format input data should be copied to the buffer for ONNX Runtime inference. */

    size_t inputTensorSize = vectorProduct(inputDims);
    std::vector<float> inputTensorValues(inputTensorSize);
    inputTensorValues.assign(preprocessedImage.begin<float>(),
                             preprocessedImage.end<float>());

    size_t outputTensorSize = vectorProduct(outputDims);
    assert(("Output tensor size should equal to the label set size.",
            labels.size() == outputTensorSize));
    std::vector<float> outputTensorValues(outputTensorSize);


    /* Once the buffers were created, they would be used for creating instances of Ort::Value 
    which is the tensor format for ONNX Runtime. There could be multiple inputs for a neural network, 
    so we have to prepare an array of Ort::Value instances for inputs and outputs respectively even if 
    we only have one input and one output. */

    std::vector<const char*> inputNames{inputName};
    std::vector<const char*> outputNames{outputName};
    std::vector<Ort::Value> inputTensors;
    std::vector<Ort::Value> outputTensors;

    /*
    Creating ONNX Runtime inference sessions, querying input and output names, 
    dimensions, and types are trivial.
    Setup inputs & outputs: The input & output tensors are created here. */

    Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
        OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
    inputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, inputTensorValues.data(), inputTensorSize, inputDims.data(),
        inputDims.size()));
    outputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, outputTensorValues.data(), outputTensorSize,
        outputDims.data(), outputDims.size()));

    /* To run inference, we provide the run options, an array of input names corresponding to the 
    inputs in the input tensor, an array of input tensor, number of inputs, an array of output names 
    corresponding to the the outputs in the output tensor, an array of output tensor, number of outputs. */

    session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                inputTensors.data(), 1, outputNames.data(),
                outputTensors.data(), 1);

    int predId = 0;
    float activation = 0;
    float maxActivation = std::numeric_limits<float>::lowest();
    float expSum = 0;
    /* The inference result could be found in the buffer for the output tensors, 
    which are usually the buffer from std::vector instances. */
    for (int i = 0; i < labels.size(); i++) {
        activation = outputTensorValues.at(i);
        expSum += std::exp(activation);
        if (activation > maxActivation)
        {
            predId = i;
            maxActivation = activation;
        }
    }
    std::cout << "Predicted Label ID: " << predId << std::endl;
    std::cout << "Predicted Label: " << labels.at(predId) << std::endl;
    float result;
    try {
      result = division(std::exp(maxActivation), expSum);
      std::cout << "Uncalibrated Confidence: " << result << std::endl;
    }
    catch (std::runtime_error& e) {
      std::cout << "Exception occurred" << std::endl << e.what();
    }

    // Measure latency
    int numTests{100};
    std::chrono::steady_clock::time_point begin =
        std::chrono::steady_clock::now();

    //Run: Running the session is done in the Run() method:
    for (int i = 0; i < numTests; i++) {
        session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                    inputTensors.data(), 1, outputNames.data(),
                    outputTensors.data(), 1);
    }
    std::chrono::steady_clock::time_point end =
        std::chrono::steady_clock::now();
    std::cout << "Minimum Inference Latency: "
              << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() / static_cast<float>(numTests)
              << " ms" << std::endl;
    return 0;
}

参考：microsoft/onnxruntime

4、squeezenet

#include <cuda_provider_factory.h>
#include <onnxruntime_cxx_api.h>

#include <opencv2/dnn/dnn.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>

#include <chrono>
#include <cmath>
#include <exception>
#include <fstream>
#include <iostream>
#include <limits>
#include <numeric>
#include <string>
#include <vector>

#define ONNX_MODEL_PATH "../../data/squeezenet1.1-7.onnx"
#define IMG_PATH "../../data/european-bee-eater-2115564_1920.jpg"
#define LABEL_PATH "../../data/synset.txt"

template <typename T>
T vectorProduct(const std::vector<T>& v)
{
    return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
}

/**
 * @brief Operator overloading for printing vectors
 * @tparam T
 * @param os
 * @param v
 * @return std::ostream&
 */
template <typename T>
std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
{
    os << "[";
    for (int i = 0; i < v.size(); ++i)
    {
        os << v[i];
        if (i != v.size() - 1)
        {
            os << ", ";
        }
    }
    os << "]";
    return os;
}

/**
 * @brief Print ONNX tensor data type
 * https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_c_api.h#L93
 * @param os
 * @param type
 * @return std::ostream&
 */
std::ostream& operator<<(std::ostream& os,
                         const ONNXTensorElementDataType& type)
{
    switch (type)
    {
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED:
            os << "undefined";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
            os << "float";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
            os << "uint8_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
            os << "int8_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
            os << "uint16_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16:
            os << "int16_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
            os << "int32_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
            os << "int64_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING:
            os << "std::string";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
            os << "bool";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
            os << "float16";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE:
            os << "double";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32:
            os << "uint32_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64:
            os << "uint64_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64:
            os << "float real + float imaginary";
            break;
        case ONNXTensorElementDataType::
            ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128:
            os << "double real + float imaginary";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16:
            os << "bfloat16";
            break;
        default:
            break;
    }

    return os;
}

cv::Mat normalization(std::string imageFilepath, std::vector<int64_t> inputDims){
    cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR);
    cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
    cv::resize(imageBGR, resizedImageBGR,
               cv::Size(inputDims.at(2), inputDims.at(3)),
               cv::InterpolationFlags::INTER_CUBIC);
    cv::cvtColor(resizedImageBGR, resizedImageRGB,
                 cv::ColorConversionCodes::COLOR_BGR2RGB);
    resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);

    cv::Mat channels[3];
    cv::split(resizedImage, channels);
    // Normalization per channel
    // Normalization parameters obtained from
    // https://github.com/onnx/models/tree/master/vision/classification/squeezenet
    channels[0] = (channels[0] - 0.485) / 0.229;
    channels[1] = (channels[1] - 0.456) / 0.224;
    channels[2] = (channels[2] - 0.406) / 0.225;
    cv::merge(channels, 3, resizedImage);
    // HWC to CHW
    cv::dnn::blobFromImage(resizedImage, preprocessedImage);
    return preprocessedImage;
}


std::vector<std::string> readLabels(std::string& labelFilepath)
{
    std::vector<std::string> labels;
    std::string line;
    std::ifstream fp(labelFilepath);
    while (std::getline(fp, line))
    {
        labels.push_back(line);
    }
    return labels;
}

int main(int argc, char* argv[])
{
    bool useCUDA{true};
    const char* useCUDAFlag = "--use_cuda";
    const char* useCPUFlag = "--use_cpu";
    if (argc == 1)
    {
        useCUDA = false;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCUDAFlag) == 0))
    {
        useCUDA = true;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCPUFlag) == 0))
    {
        useCUDA = false;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCUDAFlag) != 0))
    {
        useCUDA = false;
    }
    else
    {
        throw std::runtime_error{"Too many arguments."};
    }

    if (useCUDA)
    {
        std::cout << "Inference Execution Provider: CUDA" << std::endl;
    }
    else
    {
        std::cout << "Inference Execution Provider: CPU" << std::endl;
    }

    // Define data and model
    std::string instanceName{"image-classification-inference"};
    std::string modelFilepath{ONNX_MODEL_PATH};
    std::string imageFilepath{IMG_PATH};
    std::string labelFilepath{LABEL_PATH};

    std::vector<std::string> labels{readLabels(labelFilepath)};

   
    
    Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
                 instanceName.c_str());
    Ort::SessionOptions sessionOptions;
    sessionOptions.SetIntraOpNumThreads(1);
    if (useCUDA)
    {
        // Using CUDA backend
        // https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/providers/cuda/cuda_provider_factory.h#L13
        OrtStatus* status =
            OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);
    }

    // Sets graph optimization level
    // Available levels are
    // ORT_DISABLE_ALL -> To disable all optimizations
    // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node
    // removals) ORT_ENABLE_EXTENDED -> To enable extended optimizations
    // (Includes level 1 + more complex optimizations like node fusions)
    // ORT_ENABLE_ALL -> To Enable All possible optimizations
    sessionOptions.SetGraphOptimizationLevel(
        GraphOptimizationLevel::ORT_ENABLE_EXTENDED);

    Ort::Session session(env, modelFilepath.c_str(), sessionOptions);

    Ort::AllocatorWithDefaultOptions allocator;

    size_t numInputNodes = session.GetInputCount();
    size_t numOutputNodes = session.GetOutputCount();

    std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
    std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;

    const char* inputName = session.GetInputName(0, allocator);
    std::cout << "Input Name: " << inputName << std::endl;

    Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
    auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
    std::cout << "Input Type: " << inputType << std::endl;

    std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
    std::cout << "Input Dimensions: " << inputDims << std::endl;

    const char* outputName = session.GetOutputName(0, allocator);
    std::cout << "Output Name: " << outputName << std::endl;

    Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
    auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
    std::cout << "Output Type: " << outputType << std::endl;

    std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
    std::cout << "Output Dimensions: " << outputDims << std::endl;
    
    // Normalize iamge
    cv::Mat preprocessedImage = normalization(imageFilepath, inputDims);

    
    size_t inputTensorSize = vectorProduct(inputDims);
    std::vector<float> inputTensorValues(inputTensorSize);
    inputTensorValues.assign(preprocessedImage.begin<float>(),
                             preprocessedImage.end<float>());

    size_t outputTensorSize = vectorProduct(outputDims);
    assert(("Output tensor size should equal to the label set size.",
            labels.size() == outputTensorSize));
    std::vector<float> outputTensorValues(outputTensorSize);

    std::vector<const char*> inputNames{inputName};
    std::vector<const char*> outputNames{outputName};
    std::vector<Ort::Value> inputTensors;
    std::vector<Ort::Value> outputTensors;

    Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
        OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
    inputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, inputTensorValues.data(), inputTensorSize, inputDims.data(),
        inputDims.size()));
    outputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, outputTensorValues.data(), outputTensorSize,
        outputDims.data(), outputDims.size()));

    // https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_cxx_api.h#L353
    session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                inputTensors.data(), 1, outputNames.data(),
                outputTensors.data(), 1);

    int predId = 0;
    float activation = 0;
    float maxActivation = std::numeric_limits<float>::lowest();
    float expSum = 0;
    for (int i = 0; i < labels.size(); i++)
    {
        activation = outputTensorValues.at(i);
        expSum += std::exp(activation);
        if (activation > maxActivation)
        {
            predId = i;
            maxActivation = activation;
        }
    }
    std::cout << "Predicted Label ID: " << predId << std::endl;
    std::cout << "Predicted Label: " << labels.at(predId) << std::endl;
    std::cout << "Uncalibrated Confidence: " << std::exp(maxActivation) / expSum
              << std::endl;

    // Measure latency
    int numTests{100};
    std::chrono::steady_clock::time_point begin =
        std::chrono::steady_clock::now();
    for (int i = 0; i < numTests; i++)
    {
        session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                    inputTensors.data(), 1, outputNames.data(),
                    outputTensors.data(), 1);
    }
    std::chrono::steady_clock::time_point end =
        std::chrono::steady_clock::now();
    std::cout << "Minimum Inference Latency: "
              << std::chrono::duration_cast<std::chrono::milliseconds>(end -
                                                                       begin)
                         .count() /
                     static_cast<float>(numTests)
              << " ms" << std::endl;
}

参考：madara-tribe/cpp_sample_onnx_inference

5、resnet101

#include <iostream>
#include <string>
#include <vector>
#include <random>
#include<opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
//#include "segmentation_onnx.h"
//#include <onnxruntime_cxx_inline.h>
//#include <cuda_provider_factory.h>
//#include <cpu_provider_factory.h>
//#include <onnxruntime_cxx_api.h>

#include <onnxruntime/core/session/onnxruntime_cxx_api.h>
#include "onnxruntime/core/providers/cpu/cpu_provider_factory.h"
#include "onnxruntime/core/providers/cuda/cuda_provider_factory.h"
#include "onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h"


#include <chrono>

static constexpr const int width_ = 640;
static constexpr const int height_ = 480;
static constexpr const int channel = 4;

std::array<float, 1 * width_ * height_ * channel> input_image_{};
//std::array<float, 1 * width_ * height_ * 1>              results_{};
cv::Mat results_ = cv::Mat::zeros(height_, width_, CV_32F);

#ifdef _WIN32
//const wchar_t* model_path = L"4channels384_640.onnx";
//const wchar_t* model_path = L"D:/pengt/code/Cplus/onnx_model/resnet101_21_384x640.onnx";
const wchar_t* model_path = L"D:/pengt/code/Cplus/onnx_model/resnet101_21_480x640.onnx";

#else
const char* model_path = "4channels384_640.onnx";
#endif


#define USE_CUDA

class ONNX_Model
{
public:
#ifdef _WIN32
    ONNX_Model(const wchar_t* model_path)
        : m_env{ ORT_LOGGING_LEVEL_ERROR, "" },
        m_session{ nullptr },
        m_sess_opts{},
        m_mem_info{ Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault) }
#else
    ONNX_Model(char* model_path)
#endif
    {
         //option
         /*  if (true)
            m_sess_opts.EnableCpuMemArena();
        else
            m_sess_opts.DisableCpuMemArena();*/

        //end
        
        /*
        // DirectML
        Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_DML(m_sess_opts, 0));
        m_sess_opts.DisableMemPattern();
        m_sess_opts.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
        m_session = Ort::Session(m_env, model_path, m_sess_opts);
        */

        
        // CPU
       // Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CPU(m_sess_opts, 0));
       // m_sess_opts.EnableMemPattern();
       // m_sess_opts.SetIntraOpNumThreads(8);
        //m_sess_opts.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
       // m_sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
       // m_session = Ort::Session(m_env, model_path, m_sess_opts);
        //tensorrt
        Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(m_sess_opts,0));
        Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(m_sess_opts, 0));

        //m_sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL);
        m_sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

        //m_sess_opts.SetOptimizedModelFilePath(out_optimize_path);  //save optimize

        m_session = Ort::Session(m_env, model_path, m_sess_opts);
        //m_session = Ort::Session(m_env, out_optimize_path, m_sess_opts);
        //end tensorrt
        

         // CUDA
        //Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(m_sess_opts, 0));
        //m_sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
        //m_session = Ort::Session(m_env, model_path, m_sess_opts);
       
        
        //
        auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
        input_tensor_ = Ort::Value::CreateTensor<float>(memory_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
        //output_tensor_ = Ort::Value::CreateTensor<float>(memory_info, results_.data(), results_.size(), output_shape_.data(), output_shape_.size());
        output_tensor_ = Ort::Value::CreateTensor<float>(memory_info, (float*)results_.data, height_ * width_, output_shape_.data(), output_shape_.size());

    }
    void  Run()
    {
        m_session.Run(Ort::RunOptions{ nullptr }, input_names.data(), &input_tensor_, 1, output_names.data(), &output_tensor_, 1);
        return;
    }

    std::vector<int64_t> get_input_shape_from_session()
    {
        Ort::TypeInfo info = m_session.GetInputTypeInfo(0);
        auto tensor_info = info.GetTensorTypeAndShapeInfo();
        size_t dim_count = tensor_info.GetDimensionsCount();
        std::vector<int64_t> dims(dim_count);
        tensor_info.GetDimensions(dims.data(), dims.size());
        return dims;
    }
private:
    Ort::Env m_env;
    Ort::Session m_session;
    Ort::SessionOptions m_sess_opts;
    Ort::MemoryInfo m_mem_info;
    //std::vector<const char*> input_names{ "0" };
    //std::vector<const char*> output_names{ "293" };  //Unet

    std::vector<const char*> input_names{ "input.1" };
    std::vector<const char*> output_names{ "1207" };  //resnet101
  
    //std::vector<const char*> input_names{  "input.1" };
    //std::vector<const char*> output_names{  "1683" };  //resnet152

    Ort::Value              input_tensor_{ nullptr };
    std::array<int64_t, 4>  input_shape_{ 1, channel,  height_,width_ };
    Ort::Value              output_tensor_{ nullptr };
    std::array<int64_t, 4>  output_shape_{ 1, 1, height_,width_ };

};


// data need 

//rgbm = rgbm / 255.
//mean = [0.485, 0.456, 0.406, 0]
//std = [0.229, 0.224, 0.225, 1]
//rgbm = rgbm - mean
//rgbm = rgbm / std
//x.transpose(2, 0, 1).astype('float32')

#if 0
void fill_data(cv::Mat input_img, cv::Mat pre_mask, float* output, const int index = 0)
{
    cv::Mat dst_img;
    cv::Mat dst_pre;
    cv::Scalar mean = cv::Scalar(0.485, 0.456, 0.406);
    cv::Scalar std = cv::Scalar(0.229, 0.224, 0.225);
    float scale = 0.00392;
    //cv::Scalar mean_ = cv::Scalar(109.125, 102.6, 91.35);
    //cv::Scalar std_ = cv::Scalar(0.0171, 0.01884, 0.01975);
    //if (images.depth() == CV_8U && ddepth == CV_32F)
    //    images.convertTo(images[i], CV_32F);
    //std::swap(mean[0], mean[2]);
    input_img.convertTo(dst_img, CV_32F, scale);
    pre_mask.convertTo(dst_pre, CV_32F, scale);

    dst_img -= mean;
    dst_img /= std;

    //dst_img -= mean_;
    //dst_img *= std_;
    //std::vector<cv::Mat> channels;
    //split(input_img, channels);//²ð·Ö
    //channels.push_back(pre_mask);
    //cv::convertTo(img_float, CV_32F, 1.0 / 255);

    int row = dst_img.rows;
    int col = dst_img.cols;

    //cv::Scalar rgb_mean = cv::mean(dst);
    //std::cout<< (dst_img.ptr<float>(0, 214)[0]) <<std::endl;


    for (int c = 0; c < 3; c++) {
        for (int i = 0; i < row; i++) {
            for (int j = 0; j < col; j++) {
                //output[c * row * col + i * col + j] = (dst_img.ptr<uchar>(i)[j * 3 + c]);
                //std::cout << "i and j :"<<i<<","<<j<<"="<<(dst_img.ptr<float>(i,j)[c]) << std::endl;
                output[c * row * col + i * col + j] = (dst_img.ptr<float>(i, j)[c]);
            }
        }
    }
    if (index % 20 == 0)
    {
        for (int i = 0; i < row; i++) {
            for (int j = 0; j < col; j++) {
                //output[4 * row * col + i * col + j] = (dst_pre.ptr<uchar>(i)[j]);
                output[3 * row * col + i * col + j] = (dst_pre.ptr<float>(i, j)[0]);
            }
        }
    }

    return;
}
#else
void fill_data(cv::Mat input_img, cv::Mat pre_mask, float* output, const int index = 0)
{
    float mean[3] = { 123.675, 116.28, 103.53 };
    float std[3] = { 0.0171, 0.0175, 0.0174 };
    float allmul[3] = { 2.1145,2.0349,1.8014 };
    float scale = 1;
    float* dst_pre = pre_mask.ptr<float>(0, 0);
    int row = input_img.rows;
    int col = input_img.cols;
    int alllen = row * col;
    //double timeStart = (double)cv::getTickCount();
    //HWC -> CHW
    float* inbuf = output;
    for (int c = 0; c < 3; c++) {
        uchar* img_data = input_img.data;
        for (int i = 0; i < row; i++) {
            for (int j = 0; j < col; j++) {
                *inbuf = img_data[c] * std[c] - allmul[c];//(dst_img.ptr<float>(i, j)[c]);
                img_data += 3;
                inbuf++;
            }
        }
    }

    //double circle = ((double)cv::getTickCount() - timeStart) / cv::getTickFrequency();
    //std::cout << "circle time  ：" << circle << " sec";
    memcpy(inbuf, dst_pre, sizeof(float) * alllen);

    return;
}
#endif

using namespace cv;
std::vector<Vec3b> colors;

void colorizeSegmentation(const Mat& score, Mat& segm)
{
    const int rows = score.size[2];
    const int cols = score.size[3];
    const int chns = score.size[1];

    if (colors.empty()) {
        // Generate colors.
        colors.push_back(Vec3b());
        for (int i = 1; i < chns; ++i) {
            Vec3b color;
            for (int j = 0; j < 3; ++j)
                color[j] = (colors[i - 1][j] + rand() % 256) / 2;
            colors.push_back(color);
        }
    }
    else if (chns != (int)colors.size()) {
        CV_Error(Error::StsError, format("Number of output classes does not match "
            "number of colors (%d != %zu)", chns, colors.size()));
    }

    Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
    Mat maxVal(rows, cols, CV_32FC1, score.data);
    for (int ch = 1; ch < chns; ch++) {
        for (int row = 0; row < rows; row++) {
            const float* ptrScore = score.ptr<float>(0, ch, row);
            uint8_t* ptrMaxCl = maxCl.ptr<uint8_t>(row);
            float* ptrMaxVal = maxVal.ptr<float>(row);
            for (int col = 0; col < cols; col++) {
                if (ptrScore[col] > ptrMaxVal[col]) {
                    ptrMaxVal[col] = ptrScore[col];
                    ptrMaxCl[col] = (uchar)ch;
                }
            }
        }
    }

    segm.create(rows, cols, CV_8UC3);
    for (int row = 0; row < rows; row++) {
        const uchar* ptrMaxCl = maxCl.ptr<uchar>(row);
        Vec3b* ptrSegm = segm.ptr<Vec3b>(row);
        for (int col = 0; col < cols; col++) {
            ptrSegm[col] = colors[ptrMaxCl[col]];
        }
    }
}

// 背景，  前景， mask
cv::Mat replace_and_blend(cv::Mat bkimg, cv::Mat& frame, cv::Mat& mask)
{
    cv::Mat result = cv::Mat::zeros(frame.size(), frame.type());
    int h = frame.rows;
    int w = frame.cols;
    int m = 0;
    double wt = 0;

    int b = 0, g = 0, r = 0;
    int b1 = 0, g1 = 0, r1 = 0;
    int b2 = 0, g2 = 0, r2 = 0;

    for (int row = 0; row < h; row++)
    {
        uchar* current = frame.ptr<uchar>(row);
        uchar* bgrow = bkimg.ptr<uchar>(row);
        uchar* maskrow = mask.ptr<uchar>(row);
        uchar* targetrow = result.ptr<uchar>(row);

        for (int col = 0; col < w; col++)
        {
            m = *maskrow++;
            if (m == 0)   //如果是背景 替换为背景数据
            {
                *targetrow++ = *bgrow++;
                *targetrow++ = *bgrow++;
                *targetrow++ = *bgrow++;
                current += 3;
            }
            else if (m == 255) //如果是前景 保留原来数据
            {
                *targetrow++ = *current++;
                *targetrow++ = *current++;
                *targetrow++ = *current++;
                bgrow += 3;
            }
            else //由于形态学平滑造成的过渡区 颜色采用加权均衡化
            {
                b1 = *bgrow++;
                g1 = *bgrow++;
                r1 = *bgrow++;

                b2 = *current++;
                g2 = *current++;
                r2 = *current++;

                wt = m / 255.0;

                b = b2 * wt + b1 * (1 - wt);
                g = g2 * wt + g1 * (1 - wt);
                r = b2 * wt + r1 * (1 - wt);

                *targetrow++ = b;
                *targetrow++ = g;
                *targetrow++ = r;

            }
        }
    }
    return result;  //返回结果
}


#if 0
int main(void)
{
    ONNX_Model model;
    std::vector<int64_t> dims = model.get_input_shape_from_session();
    std::cout << "Input Shape: (";
    std::cout << dims[0] << ", " << dims[1] << ", " << dims[2] << ", " << dims[3] << ")" << std::endl;
    int inputwidth = 640;
    int inputheight = 384;
    cv::Mat pre_mask = cv::Mat::zeros(inputheight, inputwidth, CV_8UC1);  //height  width
    cv::Mat frame, image0;
    frame = cv::imread("D:/pengt/segmetation/test_pic/1.png");
    cv::resize(frame, frame, cv::Size(inputwidth, inputheight));  //width height
    cv::cvtColor(frame, image0, cv::COLOR_BGR2RGB);

    double timeStart = (double)cv::getTickCount();
    float* output = input_image_.data();

    std::fill(input_image_.begin(), input_image_.end(), 0.f);
    fill_data(image0, pre_mask, output);

    model.Run();
    double nTime = ((double)cv::getTickCount() - timeStart) / cv::getTickFrequency();
    std::cout << "running time £º" << nTime << "sec\n" << std::endl;

    cv::Mat segm;
    //colorizeSegmentation(results_, segm);
    cv::threshold(results_, segm, 0.5, 200, cv::THRESH_BINARY);

    cv::imshow("mask", segm);
    cv::imshow("pre_image", frame);
    cv::waitKey(0);

    return 0;
}


#else
int main(void)
{
    ONNX_Model model(model_path);
    std::vector<int64_t> dims = model.get_input_shape_from_session();
    std::cout << "Input Shape: (";
    std::cout << dims[0] << ", " << dims[1] << ", " << dims[2] << ", " << dims[3] << ")" << std::endl;
    int inputwidth = width_;
    int inputheight = height_;
    cv::Mat pre_mask = cv::Mat::zeros(height_, width_, CV_32F);//cv::Mat::zeros(inputheight, inputwidth, CV_8UC1);  //height  width
    cv::Mat frame, image0;
    cv::Mat backimg = cv::imread("0.jpg");
    cv::resize(backimg, backimg, cv::Size(inputwidth, inputheight));

    cv::VideoCapture capture(0);
    capture.set(cv::CAP_PROP_FRAME_WIDTH, 640);

    capture.set(cv::CAP_PROP_FRAME_HEIGHT, 360);
    int index = 0;
   /// object_rect res_area;

    //需顺时针90°旋转时，transpose(src, tmp) + flip(tmp, dst, 1)

    //需逆时针90°旋转时，transpose(src, tmp) + flip(tmp, dst, 0)

    while (true)
    {
        index += 1;
        capture >> frame;

        //cv::transpose(frame, rotef);
        //cv::Mat rodst;
        //cv::rotate(frame, rodst, 0); //顺时针90
        //cv::rotate(frame, rodst, 2);   //顺时针270


         cv::Mat sizeFrame;
        double timeStart = (double)cv::getTickCount();
        //resize_uniform(frame, sizeFrame, cv::Size(inputwidth, inputheight), res_area);

        cv::resize(frame, sizeFrame, cv::Size(inputwidth, inputheight));  //width height
        cv::cvtColor(sizeFrame, image0, cv::COLOR_BGR2RGB);


        float* output = input_image_.data();

        // std::fill(input_image_.begin(), input_image_.end(), 0.f);  // 暂时去掉
        fill_data(image0, pre_mask, output, index);

        double midpredict = (double)cv::getTickCount();

        double postTime = (midpredict - timeStart) / cv::getTickFrequency();
        std::cout << "  running time post time ：" << postTime << "sec";

        model.Run();
        double stpreTime = (double)cv::getTickCount();
        double preTime = (stpreTime - midpredict) / cv::getTickFrequency();
        std::cout << "  predict time  :" << preTime << "sec";


        cv::Mat segm;
        const int inputMaskValue = 1;
        cv::threshold(results_, segm, 0.5, 1, cv::THRESH_BINARY);

         contour
       // cv::Mat binary = segm * 255;

       // //cv::threshold(segm * 255, binary, 127, 255, cv::THRESH_BINARY);
       // cv::Mat ubinary;
       // binary.convertTo(ubinary, CV_8U);
       ///* cv::Canny(ubinary, ubinary, 60, 255,3); //可以修改canny替换所有
       // cv::imshow("canny", ubinary);*/
       // std::vector<std::vector<cv::Point>> contours;
       // std::vector<cv::Vec4i> hierarchy;

       // cv::findContours(ubinary, contours, hierarchy, cv::RETR_TREE, cv::CHAIN_APPROX_SIMPLE, cv::Point());
       // cv::Mat imageContours = cv::Mat::zeros(ubinary.size(), CV_8UC1);
       // for (int i = 0; i < contours.size(); i++)
       // {
       //     cv::drawContours(imageContours, contours, i, cv::Scalar(255), 1, 8, hierarchy);
       // }

       // double preTime = ((double)cv::getTickCount() - midpredict) / cv::getTickFrequency();

        //std::cout << "predict time  :" << preTime << "sec\n" << std::endl;
        //cv::imshow("edge", imageContours);

        /// 


        segm.copyTo(pre_mask);
        pre_mask = 0.7 * pre_mask;


        cv::Mat ucharSegem;
        segm = segm * 255;
        segm.convertTo(ucharSegem, CV_8U);

        ///
         //图形学处理 平滑mask;
        cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
        cv::Mat dstmask;
        cv::morphologyEx(ucharSegem, dstmask, cv::MORPH_OPEN, kernel);
        //cv::imshow("mask1", dstmask);
        //高斯处理 边缘更平滑 效果更好看
        cv::GaussianBlur(dstmask, dstmask, cv::Size(3, 3), 0, 0);
        //cv::imshow("mask2", dstmask);
        cv::Mat dst = replace_and_blend(backimg, sizeFrame, dstmask);

        //Mat resultdst;
        //crop_effect_area(dst, resultdst, frame.size(), res_area);

        double lastTime = ((double)cv::getTickCount() - stpreTime) / cv::getTickFrequency();

        std::cout << "  last prcess time :" << lastTime << "sec\n" << std::endl;

        cv::imshow("mask", ucharSegem);
       // cv::imshow("resultdst", resultdst);
        cv::imshow("dst", dst);
        //cv::Mat dst = cv::addWeighted(img1, 0.7, img2, 0.3, 0);
        //cv::imshow("mask", segm);

        //crop_effect_area(Mat & uniform_scaled, Mat & dst, Size ori_size, object_rect effect_area);

        //cv::imshow("pre_image", frame);
        cv::waitKey(1);

    }

    return 0;
}


#endif

参考：ptklx/onnxruntime_segment

6、CenterNet_onnxruntime

#include <core/session/onnxruntime_cxx_api.h>
#include <core/providers/cuda/cuda_provider_factory.h>
#include <core/session/onnxruntime_c_api.h>
#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <vector>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <sys/time.h>
#include <math.h>
 
using namespace cv;
using namespace std;
 

 
#define PI 3.14159265358979323846
 
struct Box
{
    float x1;
    float y1;
    float x2;
    float y2;
    float ang;
};
 
struct Detection
{
    Box bbox;
    int classId;
    float prob;
};

float Logist(float data)
{
    return 1. / (1. + exp(-data));
}
 
void preProcess(cv::Mat &img, float *output)
{
    int input_w = 512;
    int input_h = 512;
    float scale = cv::min(float(input_w) / img.cols, float(input_h) / img.rows);
    auto scaleSize = cv::Size(img.cols * scale, img.rows * scale);
 
    cv::Mat resized;
    cv::resize(img, resized, scaleSize, 0, 0);
    cv::Mat cropped = cv::Mat::zeros(input_h, input_w, CV_8UC3);
    cv::Rect rect((input_w - scaleSize.width) / 2, (input_h - scaleSize.height) / 2, scaleSize.width, scaleSize.height);
 
    resized.copyTo(cropped(rect));
    // imwrite("img_process.png", cropped); 
 
    constexpr static float mean[] = {0.5194416012442385, 0.5378052387430711, 0.533462090585746};
    constexpr static float std[] = {0.3001546018824507, 0.28620901391179554, 0.3014112676161966};
    int row = 512;
    int col = 512;
    for (int c = 0; c < 3; c++)
    {
        for (int i = 0; i < row; i++)
        {
            for (int j = 0; j < col; j++)
            {
                float pix = cropped.ptr<uchar>(i)[j * 3 + c];
                output[c * row * col + i * col + j] = (pix / 255. - mean[c]) / std[c];
            }
        }
    }
}
 
void postProcess(const float *hm, const float *wh, const float *ang, const float *reg,
                vector<Detection> &result, const int w, const int h, const int classes,
                const int kernel_size,const float visthresh)
{
    int flag = 0;
    for (int idx = 0; idx < w * h * classes; idx++)
    {
        if (idx >= w * h * classes)
            return;
        int padding = (kernel_size - 1) / 2; //1
        int offset = -padding;               //-1
        int stride = w * h;                  //128*128
        int grid_x = idx % w;                // 纵轴坐标，x
        int grid_y = (idx / w) % h;          // 横轴坐标,y
        int cls = idx / w / h;               // 第几类
        int l, m;
        int reg_index = idx - cls * stride; // 一张图里面的位置，[0,128*128]
        float c_x, c_y;
        float objProb = Logist(hm[idx]);
        flag += 1;
 
        if (objProb > visthresh)
        {
            float max = -1;
            int max_index = 0;
            for (l = 0; l < kernel_size; ++l)
                for (m = 0; m < kernel_size; ++m)
                {
                    int cur_x = offset + l + grid_x;
                    int cur_y = offset + m + grid_y;
                    int cur_index = cur_y * w + cur_x + stride * cls;
                    int valid = (cur_x >= 0 && cur_x < w && cur_y >= 0 && cur_y < h);
                    float val = (valid != 0) ? Logist(hm[cur_index]) : -1;
                    max_index = (val > max) ? cur_index : max_index;
                    max = (val > max) ? val : max;
                }
 
            if (idx == max_index)
            {
                Detection det;
                c_x = grid_x + reg[reg_index];
                c_y = grid_y + reg[reg_index + stride];
                float angle = ang[reg_index];
 
                det.bbox.x1 = (c_x - wh[reg_index] / 2) * 4;
                det.bbox.y1 = (c_y - wh[reg_index + stride] / 2) * 4;
                det.bbox.x2 = (c_x + wh[reg_index] / 2) * 4;
                det.bbox.y2 = (c_y + wh[reg_index + stride] / 2) * 4;
                det.bbox.ang = angle;
                det.classId = cls;
                det.prob = objProb;
                result.push_back(det);
            }
        }
    }
}
 
void resultCorrect(std::vector<Detection> &result, const cv::Mat &img)
{
    int input_w = 512;
    int input_h = 512;
    float scale = min(float(input_w) / img.cols, float(input_h) / img.rows);
    float dx = (input_w - scale * img.cols) / 2;
    float dy = (input_h - scale * img.rows) / 2;
    for (auto &item : result)
    {
        float x1 = (item.bbox.x1 - dx) / scale;
        float y1 = (item.bbox.y1 - dy) / scale;
        float x2 = (item.bbox.x2 - dx) / scale;
        float y2 = (item.bbox.y2 - dy) / scale;
        x1 = (x1 > 0) ? x1 : 0;
        y1 = (y1 > 0) ? y1 : 0;
        x2 = (x2 < img.cols) ? x2 : img.cols - 1;
        y2 = (y2 < img.rows) ? y2 : img.rows - 1;
        item.bbox.x1 = x1;
        item.bbox.y1 = y1;
        item.bbox.x2 = x2;
        item.bbox.y2 = y2;
    }
}

void draw(const std::vector<Detection> &result, cv::Mat &img)
{
    for (const auto &item : result)
    {
        float ang = item.bbox.ang;
        float cx = (item.bbox.x1 + item.bbox.x2) / 2;
        float cy = (item.bbox.y1 + item.bbox.y2) / 2;
        float height = (item.bbox.x2 - item.bbox.x1);
        float width = (item.bbox.y2 - item.bbox.y1);
        float anglePi = ang / 180 * PI;
        anglePi = anglePi < PI ? anglePi : anglePi - PI;
        float cosA = cos(anglePi);
        float sinA = sin(anglePi);
        float x1 = cx - 0.5 * width;
        float y1 = cy - 0.5 * height;
 
        float x0 = cx + 0.5 * width;
        float y0 = y1;
 
        float x2 = x1;
        float y2 = cy + 0.5 * height;
 
        float x3 = x0;
        float y3 = y2;
 
        int x0n = floor((x0 - cx) * cosA - (y0 - cy) * sinA + cx);
        int y0n = floor((x0 - cx) * sinA + (y0 - cy) * cosA + cy);
 
        int x1n = floor((x1 - cx) * cosA - (y1 - cy) * sinA + cx);
        int y1n = floor((x1 - cx) * sinA + (y1 - cy) * cosA + cy);
 
        int x2n = floor((x2 - cx) * cosA - (y2 - cy) * sinA + cx);
        int y2n = floor((x2 - cx) * sinA + (y2 - cy) * cosA + cy);
 
        int x3n = floor((x3 - cx) * cosA - (y3 - cy) * sinA + cx);
        int y3n = floor((x3 - cx) * sinA + (y3 - cy) * cosA + cy);
 
        cv::line(img, cv::Point(x0n, y0n), cv::Point(x1n, y1n), cv::Scalar(0, 0, 255), 3, 8, 0);
        cv::line(img, cv::Point(x1n, y1n), cv::Point(x2n, y2n), cv::Scalar(255, 0, 0), 3, 8, 0);
        cv::line(img, cv::Point(x2n, y2n), cv::Point(x3n, y3n), cv::Scalar(0, 0, 255), 3, 8, 0);
        cv::line(img, cv::Point(x3n, y3n), cv::Point(x0n, y0n), cv::Scalar(255, 0, 0), 3, 8, 0);
    }
}
 
int main(int argc, const char** argv)
{
    if (argc !=3)
	{
		std::cout << "you should input: \n./predict your_model_path//your_model.onnx your_img_path//your_img.jpg " << std::endl;
		return -1;
	}
	std::string model_path = argv[1];
	std::string image_file = argv[2];
    // 1. 加载模型
    Ort::Env env{ORT_LOGGING_LEVEL_WARNING, "R-CenterNet"};
    Ort::SessionOptions session_option;
    session_option.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
    Ort::Session session_{env, model_path.c_str(), session_option};

    // 2. 定义输入输出层，用netron看，我这里是0，512、515、518、521，分别对应输入图片，输出hm、wh、ang、reg
    std::vector<const char *> input_names = {"0"};
    const char *const output_names[] = {"512", "515", "518", "521"};

    // 3. 加载准备推理的图片
    Mat img = imread(image_file);

    // 4. 这里是为加载的图片准备一个输入的tensor，其实只要修改512，512就行
    Ort::Value input_tensor_{nullptr};
    std::array<float, 1 * 3 * 512 * 512> input_image_{};
    std::array<int64_t, 4> input_shape_{1, 3, 512, 512};

    auto allocator_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
    input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
    
    float *input_float = input_image_.data();
    fill(input_image_.begin(), input_image_.end(), 0.f);

    // 5. 预处理是把输入图片尺寸resize到网络接受的尺寸，并归一化
    preProcess(img, input_float);

    // 6. 开始推理
    std::vector<Ort::Value> ort_outputs = session_.Run(Ort::RunOptions{nullptr}, input_names.data(),
                                                       &input_tensor_, 1, output_names, 4);
 
    // 7. 后处理，提取出目标坐标、长宽、角度以及修正值，ort_outputs[0:4] -> hm wh ang reg，
    vector<Detection> result;
    postProcess(ort_outputs[0].GetTensorMutableData<float>(), ort_outputs[1].GetTensorMutableData<float>(),
                 ort_outputs[2].GetTensorMutableData<float>(), ort_outputs[3].GetTensorMutableData<float>(),
                 result, 128, 128, 1, 3, 0.3);

    // 8. 修正坐标，将坐标为负数的归为0，大于图像尺寸的置为图像边界尺寸
    resultCorrect(result, img);

    // 9. 画图
    draw(result, img);
    imwrite("result.jpg", img);
    return 0;
}

参考：ZeroE04/CenterNet_onnxruntime

7、语义分割模型

ONNXRuntime写代码十分简洁
大概分为三部分

1.初始化环境，会话等
2.会话中加载模型，得到模型的输入和输出节点
3.调用API得到模型的返回值
这里以语义分割模型U2Net为例

#include <assert.h>
#include <vector>
#include<ctime>
#include <onnxruntime_cxx_api.h>
int main(int argc, char* argv[]) 
{
    //记录程序运行时间
    auto start_time = clock();
    //初始化环境，每个进程一个环境
    //环境保留了线程池和其他状态信息
    Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
    //初始化Session选项
    Ort::SessionOptions session_options;
    session_options.SetIntraOpNumThreads(1);
    // Available levels are
    // ORT_DISABLE_ALL -> To disable all optimizations
    // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals)
    // ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions)
    // ORT_ENABLE_ALL -> To Enable All possible opitmizations
    session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);

    //*************************************************************************
    // 创建Session并把模型加载到内存中
    const wchar_t* model_path = L"u2net.onnx";

    printf("Using Onnxruntime C++ API\n");
    Ort::Session session(env, model_path, session_options);

    //*************************************************************************
    //打印模型的输入层(node names, types, shape etc.)
    Ort::AllocatorWithDefaultOptions allocator;

    //输出模型输入节点的数量
    size_t num_input_nodes = session.GetInputCount();
    size_t num_output_nodes = session.GetOutputCount();
    std::vector<const char*> input_node_names(num_input_nodes);
    std::vector<const char*> output_node_names(num_output_nodes);
    std::vector<int64_t> input_node_dims;  // simplify... this model has only 1 input node {1, 3, 224, 224}.
                                           // Otherwise need vector<vector<>>

    printf("Number of inputs = %zu\n", num_input_nodes);
    //迭代所有的输入节点
    for (int i = 0; i < num_input_nodes; i++) {
         //输出输入节点的名称
        char* input_name = session.GetInputName(i, allocator);
        printf("Input %d : name=%s\n", i, input_name);
        input_node_names[i] = input_name;

        // 输出输入节点的类型
        Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

        ONNXTensorElementDataType type = tensor_info.GetElementType();
        printf("Input %d : type=%d\n", i, type);

        input_node_dims = tensor_info.GetShape();
        //输入节点的打印维度
          printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
        //打印各个维度的大小
          for (int j = 0; j < input_node_dims.size(); j++)
             printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]);
        //batch_size=1
        input_node_dims[0] = 1;
    }
    //打印输出节点信息，方法类似
    for (int i = 0; i < num_output_nodes; i++)
    {
        char* output_name = session.GetOutputName(i, allocator);
        printf("Output: %d name=%s\n", i, output_name);
        output_node_names[i] = output_name;
        Ort::TypeInfo type_info = session.GetOutputTypeInfo(i);
        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
        ONNXTensorElementDataType type = tensor_info.GetElementType();
        printf("Output %d : type=%d\n", i, type);
        auto output_node_dims = tensor_info.GetShape();
        printf("Output %d : num_dims=%zu\n", i, output_node_dims.size());
        for (int j = 0; j < input_node_dims.size(); j++)
            printf("Output %d : dim %d=%jd\n", i, j, output_node_dims[j]);
    }
    
    //*************************************************************************
    // 使用样本数据对模型进行评分，并检验出入值的合法性
    size_t input_tensor_size = 3 * 320 * 320;  // simplify ... using known dim values to calculate size
                                               // use OrtGetTensorShapeElementCount() to get official size!
    
    std::vector<float> input_tensor_values(input_tensor_size);

    // 初始化一个数据（演示用,这里实际应该传入归一化的数据）
    for (unsigned int i = 0; i < input_tensor_size; i++)
        input_tensor_values[i] = (float)i / (input_tensor_size + 1);
    
    // 为输入数据创建一个Tensor对象
    try
    {
        auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
        //assert(input_tensor.IsTensor());
    
	    // 推理得到结果
	    auto output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
	    assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
	
	    // Get pointer to output tensor float values
	    float* floatarr = output_tensors.front().GetTensorMutableData<float>(); 
	    printf("Number of outputs = %d\n", output_tensors.size());
    }
    catch (Ort::Exception& e)
    {
        printf(e.what());
    }
    auto end_time = clock();
    printf("Proceed exit after %.2f seconds\n", static_cast<float>(end_time - start_time) / CLOCKS_PER_SEC);
    printf("Done!\n");
    return 0;
}

输出：
在这里插入图片描述
然后我们进一步进行一定程度的封装，方便我们后续使用

class U2NetModel
{
public:
    U2NetModel(const wchar_t* onnx_model_path);
    float* predict(std::vector<float>input_data,int batch_size=1);
private:
    Ort::Env env;
    Ort::Session session;
    Ort::AllocatorWithDefaultOptions allocator;
    std::vector<const char*>input_node_names;
    std::vector<const char*>output_node_names;
    std::vector<int64_t> input_node_dims;
};
U2NetModel::U2NetModel(const wchar_t* onnx_model_path):session(nullptr),env(nullptr)
{
    //初始化环境，每个进程一个环境,环境保留了线程池和其他状态信息
    this->env=Ort::Env(ORT_LOGGING_LEVEL_WARNING, "u2net");
    //初始化Session选项
    Ort::SessionOptions session_options;
    session_options.SetInterOpNumThreads(1);
    session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
    // 创建Session并把模型加载到内存中
    this->session=Ort::Session(env, onnx_model_path,session_options);
    //输入输出节点数量和名称
    size_t num_input_nodes = session.GetInputCount();
    size_t num_output_nodes = session.GetOutputCount();
    for (int i = 0; i < num_input_nodes; i++)
    {
        auto input_node_name = session.GetInputName(i, allocator);
        this->input_node_names.push_back(input_node_name);
        Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
        ONNXTensorElementDataType type = tensor_info.GetElementType();
        this->input_node_dims = tensor_info.GetShape();
    }
    for (int i = 0; i < num_output_nodes; i++)
    {
        auto output_node_name = session.GetOutputName(i, allocator);
        this->output_node_names.push_back(output_node_name);
    }
}
float* U2NetModel::predict(std::vector<float>input_tensor_values,int batch_size)
{
    this->input_node_dims[0] = batch_size;
    auto input_tensor_size = input_tensor_values.size();
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
    Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
    auto output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
    assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
    float* floatarr = output_tensors.front().GetTensorMutableData<float>();
    return floatarr;
}

然后初始化一个示例并调用接口

int main(int argc, char* argv[]) 
{
    auto start_time = std::clock();
    U2NetModel u2net(L"u2net.onnx");
    size_t input_tensor_size = 3 * 320 * 320;
    std::vector<float> input_tensor_values(input_tensor_size);

    //初始化一个数据（演示用）
    for (unsigned int i = 0; i < input_tensor_size; i++)
    {
        input_tensor_values[i] = (float)i / (input_tensor_size + 1);
    }
    float* results = nullptr;
    try
    {
        results = u2net.predict(input_tensor_values);
    }
    catch (Ort::Exception& e)
    {
        delete results;
        printf("%s\n", e.what());
    }
    auto end_time = std::clock();
    printf("Proceed exits after %.2f seconds", static_cast<float>(end_time - start_time) / 1000);
    printf("Done!\n");
    return 0;
}

现在模型部分结束了，但问题是我们其实并没法得知我们的模型运行情况
所以我们还需要读入图片和显示图片，也就是
1.读入一张图片
2.模型推理
3.屏幕打印图片

借助OpenCV进行模型推理

首先是重载一个新的predict函数来支持cv::Mat数据
当然这里我新写的版本已经不直接返回float*，而是std::vector

class U2NetModel
{
public:
    ...
    std::vector<float> predict(std::vector<float>& input_data,int batch_size=1,int index=0);
    cv::Mat predict(cv::Mat& input_tensor, int batch_size = 1, int index = 0);
    ...
}

代码实现，增加了对于cv::Mat的处理：

std::vector<float> U2NetModel::predict(std::vector<float>& input_tensor_values,int batch_size,int index)
{
    this->input_node_dims[0] = batch_size;
    this->output_node_dims[0] = batch_size;
    float* floatarr = nullptr;
    try
    {
        std::vector<const char*>output_node_names;
        if (index != -1)
        {
            output_node_names = { this->output_node_names[index] };
        }
        else
        {
            output_node_names = this->output_node_names;
        }
        this->input_node_dims[0] = batch_size;
        auto input_tensor_size = input_tensor_values.size();
        auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
        auto output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
        assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
        floatarr = output_tensors.front().GetTensorMutableData<float>();
    }
    catch (Ort::Exception&e)
    {
        throw e;
    }
    int64_t output_tensor_size = 1;
    for (auto& it : this->output_node_dims)
    {
        output_tensor_size *= it;
    }
    std::vector<float>results(output_tensor_size);
    for (unsigned i = 0;i < output_tensor_size; i++)
    {
        results[i] = floatarr[i];
    }
    return results;
}
cv::Mat U2NetModel::predict(cv::Mat& input_tensor, int batch_size, int index)
{
    int input_tensor_size = input_tensor.cols * input_tensor.rows * 3;
    std::size_t counter = 0;//std::vector空间一次性分配完成，避免过多的数据copy
    std::vector<float>input_data(input_tensor_size);
    std::vector<float>output_data;
    try
    {
        for (unsigned k = 0; k < 3; k++)
        {
            for (unsigned i = 0; i < input_tensor.rows; i++)
            {
                for (unsigned j = 0; j < input_tensor.cols; j++)
                {
                    input_data[counter++]=static_cast<float>(input_tensor.at<cv::Vec3b>(i, j)[k]) / 255.0;
                }
            }
        }
    }
    catch (cv::Exception& e)
    {
        printf(e.what());
    }
    try
    {
        output_data = this->predict(input_data);
    }
    catch (Ort::Exception& e)
    {
        throw e;
    }
    cv::Mat output_tensor(output_data);
    output_tensor=output_tensor.reshape(1, { 320,320 })*255.0;
    std::cout << output_tensor.rows << " " << output_tensor.cols << "fuck" << std::endl;
    return output_tensor;
}

int main(int argc, char* argv[]) 
{
    U2NetModel model(L"u2net.onnx");
    cv::Mat image = cv::imread("horse.jpg");
    cv::resize(image, image, { 320, 320 },0.0,0.0, cv::INTER_CUBIC);//调整大小到320*320
    cv::imshow("image", image);                                     //打印原图片
    cv::cvtColor(image, image, cv::COLOR_BGR2RGB);                  //BRG格式转化为RGB格式
    auto result=model.predict(image);                               //模型预测
    cv::imshow("result", result);                                   //打印结果
    cv::waitKey(0);
}

在这里插入图片描述
到这里模型的部署和结果的展示就OKK了
不过直接把模型输出转化为图片显然结果并不是非常理想
所以现在还需要对数据进行后处理，对图片进行二值化处理
得到一个Mask掩码矩阵

    cv::Mat output_tensor(output_data);
    output_tensor=255.0-output_tensor.reshape(1, { 320,320 })*255.0;
    cv::threshold(output_tensor, output_tensor, 220, 255, cv::THRESH_BINARY_INV);
    return output_tensor;

在这里插入图片描述
现在结果就已经十分的理想了

完整测试代码

#include <assert.h>
#include <vector>
#include <ctime>
#include <iostream>
#include <onnxruntime_cxx_api.h>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/videoio.hpp>
class U2NetModel
{
public:
    U2NetModel(const wchar_t* onnx_model_path);
    std::vector<float> predict(std::vector<float>& input_data,int batch_size=1,int index=0);
    cv::Mat predict(cv::Mat& input_tensor, int batch_size = 1, int index = 0);
private:
    Ort::Env env;
    Ort::Session session;
    Ort::AllocatorWithDefaultOptions allocator;
    std::vector<const char*>input_node_names;
    std::vector<const char*>output_node_names;
    std::vector<int64_t> input_node_dims;
    std::vector<int64_t> output_node_dims;
};
U2NetModel::U2NetModel(const wchar_t* onnx_model_path):session(nullptr),env(nullptr)
{
    //初始化环境，每个进程一个环境,环境保留了线程池和其他状态信息
    this->env=Ort::Env(ORT_LOGGING_LEVEL_WARNING, "u2net");
    //初始化Session选项
    Ort::SessionOptions session_options;
    session_options.SetInterOpNumThreads(4);
    session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
    // 创建Session并把模型加载到内存中
    this->session=Ort::Session(env, onnx_model_path,session_options);
    //输入输出节点数量和名称
    size_t num_input_nodes = session.GetInputCount();
    size_t num_output_nodes = session.GetOutputCount();
    for (int i = 0; i < num_input_nodes; i++)
    {
        auto input_node_name = session.GetInputName(i, allocator);
        this->input_node_names.push_back(input_node_name);
        Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
        ONNXTensorElementDataType type = tensor_info.GetElementType();
        this->input_node_dims = tensor_info.GetShape();
    }
    for (int i = 0; i < num_output_nodes; i++)
    {
        auto output_node_name = session.GetOutputName(i, allocator);
        this->output_node_names.push_back(output_node_name);
        Ort::TypeInfo type_info = session.GetOutputTypeInfo(i);
        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
        this->output_node_dims = tensor_info.GetShape();
    }
}
std::vector<float> U2NetModel::predict(std::vector<float>& input_tensor_values,int batch_size,int index)
{
    this->input_node_dims[0] = batch_size;
    this->output_node_dims[0] = batch_size;
    float* floatarr = nullptr;
    try
    {
        std::vector<const char*>output_node_names;
        if (index != -1)
        {
            output_node_names = { this->output_node_names[index] };
        }
        else
        {
            output_node_names = this->output_node_names;
        }
        this->input_node_dims[0] = batch_size;
        auto input_tensor_size = input_tensor_values.size();
        auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
        auto output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
        assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
        floatarr = output_tensors.front().GetTensorMutableData<float>();
    }
    catch (Ort::Exception&e)
    {
        throw e;
    }
    int64_t output_tensor_size = 1;
    for (auto& it : this->output_node_dims)
    {
        output_tensor_size *= it;
    }
    std::vector<float>results(output_tensor_size);
    for (unsigned i = 0;i < output_tensor_size; i++)
    {
        results[i] = floatarr[i];
    }
    return results;
}
cv::Mat U2NetModel::predict(cv::Mat& input_tensor, int batch_size, int index)
{
    int input_tensor_size = input_tensor.cols * input_tensor.rows * 3;
    std::size_t counter = 0;//std::vector空间一次性分配完成，避免过多的数据copy
    std::vector<float>input_data(input_tensor_size);
    std::vector<float>output_data;
    try
    {
        for (unsigned k = 0; k < 3; k++)
        {
            for (unsigned i = 0; i < input_tensor.rows; i++)
            {
                for (unsigned j = 0; j < input_tensor.cols; j++)
                {
                    input_data[counter++]=static_cast<float>(input_tensor.at<cv::Vec3b>(i, j)[k]) / 255.0;
                }
            }
        }
    }
    catch (cv::Exception& e)
    {
        printf(e.what());
    }
    try
    {
        output_data = this->predict(input_data);
    }
    catch (Ort::Exception& e)
    {
        throw e;
    }
    cv::Mat output_tensor(output_data);
    output_tensor=255.0-output_tensor.reshape(1, { 320,320 })*255.0;
    cv::threshold(output_tensor, output_tensor, 220, 255, cv::THRESH_BINARY_INV);
    
    return output_tensor;
}
int main(int argc, char* argv[]) 
{
    U2NetModel model(L"u2net.onnx");
    cv::Mat image = cv::imread("horse.jpg");
    cv::resize(image, image, { 320, 320 },0.0,0.0, cv::INTER_CUBIC);//调整大小到320*320
    cv::imshow("image", image);                                     //打印原图片
    cv::cvtColor(image, image, cv::COLOR_BGR2RGB);                  //BRG格式转化为RGB格式
    auto result=model.predict(image);                               //模型预测
    cv::imshow("result", result);                                   //打印结果
    cv::waitKey(0);
}

参考：神经网络语义分割模型C++部署(VS2019+ONNXRuntime+OpenCV)

落花逐流水

关注

2
点赞
踩
22

收藏

觉得还不错? 一键收藏
打赏
4
评论
onnxruntime c++ 代码搜集

EfficientDet.h#include <core/session/onnxruntime_cxx_api.h>#include <cstring>#include <array>#include <cmath>#include <algorithm>#include <iostream>#include <opencv2/core/core.hpp>#include <opencv2/highg
复制链接

扫一扫