onnxruntime c++ 代码搜集

15 篇文章 2 订阅

1、EfficientDet

EfficientDet.h

#include <core/session/onnxruntime_cxx_api.h>
#include <cstring>
#include <array>
#include <cmath>
#include <algorithm>
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "providers.h"
 
class EfficientDetOnnxRT {
 public:
 
  EfficientDetOnnxRT(std::string onnx_file,unsigned int numClasses); 
  
  ~EfficientDetOnnxRT(); 
 
  ...
  
  bool processInput(float* buffer,cv::Mat& cv_img_origin); 
 
  std::vector<std::vector<float>> processOutput(float* regressionOutputBuffer,float* classificationOutputBuffer,
                 float* anchorsOutputBuffer,float nms_threshold, float score_thresholdi); 
 
  std::vector<std::vector<float>> infer(cv::Mat& cv_img, float nms_threshold, float score_threshold ); 
 
 
 private:
  ...
 
  char* input_names[1]{nullptr};
  float* input_image_{nullptr};
  Ort::Value input_tensor_{(Ort::Value)nullptr};
  std::array<int64_t, 4> input_shape_;
 
  Ort::Env env_ {ORT_LOGGING_LEVEL_ERROR, "EfficientDetOnnxEnv"};  
  Ort::Session session_ {nullptr};
 
  char* output_names[3]{nullptr,nullptr,nullptr};
  Ort::Value output_tensor_[3]{(Ort::Value)nullptr,(Ort::Value)nullptr,(Ort::Value)nullptr};
  std::array<int64_t,3> output_shape_regression;     
  std::array<int64_t,3> output_shape_classification; 
  std::array<int64_t,3> output_shape_anchors;        
  float* results_regression{nullptr};    
  float* results_classification{nullptr}; 
  float* results_anchors{nullptr};       
 
  ...
 
};

EfficientDet.cpp

EfficientDetOnnxRT::EfficientDetOnnxRT(std::string onnx_file,unsigned int numClasses) {
 
    Ort::SessionOptions op;
    op.SetLogSeverityLevel(ORT_LOGGING_LEVEL_ERROR);
    int device_id = 0;
    
    std::cout <<"onnxruntime loading onnx model..." <<std::endl;
#if defined(USE_TENSORRT)
      std::cout <<"onnxruntime running with TensorRT..." <<std::endl;
      Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(op, device_id));
      
#elif defined(USE_CUDA)
      OrtCUDAProviderOptions cuda_options{
          0,
          OrtCudnnConvAlgoSearch::EXHAUSTIVE,
          std::numeric_limits<size_t>::max(),
          0,
          true
      };
      Ort::ThrowOnError(op.OrtSessionOptionsAppendExecutionProvider_CUDA(op, &cuda_options));
#endif
    session_ = Ort::Session(env_, onnx_file.c_str(), op);
 
    Ort::AllocatorWithDefaultOptions ort_alloc;
    char* tmp = session_.GetInputName(0, ort_alloc);
    input_names[0] = strdup(tmp);
    ort_alloc.Free(tmp);
    tmp = session_.GetOutputName(0, ort_alloc);
    output_names[0] = strdup(tmp);
    ort_alloc.Free(tmp);
    tmp = session_.GetOutputName(1, ort_alloc);
    output_names[1] = strdup(tmp);
    ort_alloc.Free(tmp);
    tmp = session_.GetOutputName(2, ort_alloc);
    output_names[2] = strdup(tmp);
    ort_alloc.Free(tmp);
   
    Ort::TypeInfo info = session_.GetInputTypeInfo(0); 
    auto tensor_info = info.GetTensorTypeAndShapeInfo();
    size_t dim_count = tensor_info.GetDimensionsCount();
    std::vector<int64_t> dims(dim_count);
    tensor_info.GetDimensions(dims.data(), dims.size());
    channels_ = dims[1];
    height_ = dims[2];
    width_  = dims[3];
    input_shape_[0]= dims[0];
    input_shape_[1]= channels_;
    input_shape_[2]= height_; 
    input_shape_[3]= width_;
 
 
    info = session_.GetOutputTypeInfo(0); 
    auto tensor_info2 = info.GetTensorTypeAndShapeInfo();
    dim_count = tensor_info.GetDimensionsCount();
    dims.clear();
    dims.resize(dim_count);
    tensor_info2.GetDimensions(dims.data(), dims.size());
    for (int i=0; i< dims.size();i++)
      output_shape_regression[i] = dims[i];
 
  
    info = session_.GetOutputTypeInfo(1); 
    auto tensor_info3 = info.GetTensorTypeAndShapeInfo();
    dim_count = tensor_info3.GetDimensionsCount();
    dims.clear();
    dims.resize(dim_count);
    tensor_info3.GetDimensions(dims.data(), dims.size());
    for (int i=0; i< dims.size();i++)
      output_shape_classification[i] = dims[i];
 
    numClassScores_ = static_cast<unsigned int>(dims[2]);
    output_shape_classification[3] = numClassScores_;
    ...
 
    info = session_.GetOutputTypeInfo(2); 
    auto tensor_info4 = info.GetTensorTypeAndShapeInfo();
    dim_count = tensor_info4.GetDimensionsCount();
    dims.clear();
    dims.resize(dim_count);
    tensor_info4.GetDimensions(dims.data(), dims.size());
    for (int i=0; i< dims.size();i++)
      output_shape_anchors[i] = dims[i];
    ... 
    
    int size_anchors = dims[1] * dims[2];
    int size_classification = dims[1];
    results_regression = new float[size_anchors];
    results_classification = new float[size_classification];
    results_anchors = new float[size_anchors];  
 
    int size_image_data = channels_ * width_ * height_;
    input_image_ = new float[size_image_data];
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
    input_tensor_ = Ort::Value::CreateTensor<float>(memory_info, input_image_, size_image_data, input_shape_.data(), input_shape_.size());
    output_tensor_[0] = Ort::Value::CreateTensor<float>(memory_info, results_regression, size_anchors, 
                                       output_shape_regression.data(), output_shape_regression.size());
    output_tensor_[1] = Ort::Value::CreateTensor<float>(memory_info, results_classification, size_classification,
                                       output_shape_classification.data(), output_shape_classification.size());
    output_tensor_[2] = Ort::Value::CreateTensor<float>(memory_info, results_anchors, size_anchors,
                                       output_shape_anchors.data(), output_shape_anchors.size());
  }
 
...
 
std::vector<std::vector<float>> EfficientDetOnnxRT::infer(cv::Mat& cv_img, float nms_threshold, float score_threshold ) {
    memset(input_image_,0, channels_ * height_ * width_ * sizeof(float));
    processInput(input_image_,cv_img);   
 
    session_.Run(Ort::RunOptions{nullptr}, &input_names[0], &input_tensor_, 1, &output_names[0], &output_tensor_[0], 3); 
    return 
 processOutput(results_regression,results_classification,results_anchors,nms_threshold,score_threshold);
  }
 
...

参考:onnxruntime调用AI模型的python和C++编程

2、squeezenet

// https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/CXX_Api_Sample.cpp
// https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_cxx_api.h
#include <cuda_provider_factory.h>
#include <onnxruntime_cxx_api.h>

#include <opencv2/dnn/dnn.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>

#include <chrono>
#include <cmath>
#include <exception>
#include <fstream>
#include <iostream>
#include <limits>
#include <numeric>
#include <string>
#include <vector>

template <typename T>
T vectorProduct(const std::vector<T>& v)
{
    return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
}

/**
 * @brief Operator overloading for printing vectors
 * @tparam T
 * @param os
 * @param v
 * @return std::ostream&
 */
template <typename T>
std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
{
    os << "[";
    for (int i = 0; i < v.size(); ++i)
    {
        os << v[i];
        if (i != v.size() - 1)
        {
            os << ", ";
        }
    }
    os << "]";
    return os;
}

/**
 * @brief Print ONNX tensor data type
 * https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_c_api.h#L93
 * @param os
 * @param type
 * @return std::ostream&
 */
std::ostream& operator<<(std::ostream& os,
                         const ONNXTensorElementDataType& type)
{
    switch (type)
    {
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED:
            os << "undefined";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
            os << "float";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
            os << "uint8_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
            os << "int8_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
            os << "uint16_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16:
            os << "int16_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
            os << "int32_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
            os << "int64_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING:
            os << "std::string";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
            os << "bool";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
            os << "float16";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE:
            os << "double";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32:
            os << "uint32_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64:
            os << "uint64_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64:
            os << "float real + float imaginary";
            break;
        case ONNXTensorElementDataType::
            ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128:
            os << "double real + float imaginary";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16:
            os << "bfloat16";
            break;
        default:
            break;
    }

    return os;
}

std::vector<std::string> readLabels(std::string& labelFilepath)
{
    std::vector<std::string> labels;
    std::string line;
    std::ifstream fp(labelFilepath);
    while (std::getline(fp, line))
    {
        labels.push_back(line);
    }
    return labels;
}

int main(int argc, char* argv[])
{
    bool useCUDA{true};
    const char* useCUDAFlag = "--use_cuda";
    const char* useCPUFlag = "--use_cpu";
    if (argc == 1)
    {
        useCUDA = false;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCUDAFlag) == 0))
    {
        useCUDA = true;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCPUFlag) == 0))
    {
        useCUDA = false;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCUDAFlag) != 0))
    {
        useCUDA = false;
    }
    else
    {
        throw std::runtime_error{"Too many arguments."};
    }

    if (useCUDA)
    {
        std::cout << "Inference Execution Provider: CUDA" << std::endl;
    }
    else
    {
        std::cout << "Inference Execution Provider: CPU" << std::endl;
    }

    std::string instanceName{"image-classification-inference"};
    std::string modelFilepath{"../../data/models/squeezenet1.1-7.onnx"};
    std::string imageFilepath{
        "../../data/images/european-bee-eater-2115564_1920.jpg"};
    std::string labelFilepath{"../../data/labels/synset.txt"};

    std::vector<std::string> labels{readLabels(labelFilepath)};

    // https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_c_api.h#L123
    Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
                 instanceName.c_str());
    Ort::SessionOptions sessionOptions;
    sessionOptions.SetIntraOpNumThreads(1);
    if (useCUDA)
    {
        // Using CUDA backend
        // https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/providers/cuda/cuda_provider_factory.h#L13
        OrtStatus* status =
            OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);
    }

    // Sets graph optimization level
    // Available levels are
    // ORT_DISABLE_ALL -> To disable all optimizations
    // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node
    // removals) ORT_ENABLE_EXTENDED -> To enable extended optimizations
    // (Includes level 1 + more complex optimizations like node fusions)
    // ORT_ENABLE_ALL -> To Enable All possible optimizations
    sessionOptions.SetGraphOptimizationLevel(
        GraphOptimizationLevel::ORT_ENABLE_EXTENDED);

    Ort::Session session(env, modelFilepath.c_str(), sessionOptions);

    Ort::AllocatorWithDefaultOptions allocator;

    size_t numInputNodes = session.GetInputCount();
    size_t numOutputNodes = session.GetOutputCount();

    std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
    std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;

    const char* inputName = session.GetInputName(0, allocator);
    std::cout << "Input Name: " << inputName << std::endl;

    Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
    auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
    std::cout << "Input Type: " << inputType << std::endl;

    std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
    std::cout << "Input Dimensions: " << inputDims << std::endl;

    const char* outputName = session.GetOutputName(0, allocator);
    std::cout << "Output Name: " << outputName << std::endl;

    Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
    auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
    std::cout << "Output Type: " << outputType << std::endl;

    std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
    std::cout << "Output Dimensions: " << outputDims << std::endl;

    cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR);
    cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
    cv::resize(imageBGR, resizedImageBGR,
               cv::Size(inputDims.at(2), inputDims.at(3)),
               cv::InterpolationFlags::INTER_CUBIC);
    cv::cvtColor(resizedImageBGR, resizedImageRGB,
                 cv::ColorConversionCodes::COLOR_BGR2RGB);
    resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);

    cv::Mat channels[3];
    cv::split(resizedImage, channels);
    // Normalization per channel
    // Normalization parameters obtained from
    // https://github.com/onnx/models/tree/master/vision/classification/squeezenet
    channels[0] = (channels[0] - 0.485) / 0.229;
    channels[1] = (channels[1] - 0.456) / 0.224;
    channels[2] = (channels[2] - 0.406) / 0.225;
    cv::merge(channels, 3, resizedImage);
    // HWC to CHW
    cv::dnn::blobFromImage(resizedImage, preprocessedImage);

    size_t inputTensorSize = vectorProduct(inputDims);
    std::vector<float> inputTensorValues(inputTensorSize);
    inputTensorValues.assign(preprocessedImage.begin<float>(),
                             preprocessedImage.end<float>());

    size_t outputTensorSize = vectorProduct(outputDims);
    assert(("Output tensor size should equal to the label set size.",
            labels.size() == outputTensorSize));
    std::vector<float> outputTensorValues(outputTensorSize);

    std::vector<const char*> inputNames{inputName};
    std::vector<const char*> outputNames{outputName};
    std::vector<Ort::Value> inputTensors;
    std::vector<Ort::Value> outputTensors;

    Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
        OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
    inputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, inputTensorValues.data(), inputTensorSize, inputDims.data(),
        inputDims.size()));
    outputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, outputTensorValues.data(), outputTensorSize,
        outputDims.data(), outputDims.size()));

    // https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_cxx_api.h#L353
    session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                inputTensors.data(), 1, outputNames.data(),
                outputTensors.data(), 1);

    int predId = 0;
    float activation = 0;
    float maxActivation = std::numeric_limits<float>::lowest();
    float expSum = 0;
    for (int i = 0; i < labels.size(); i++)
    {
        activation = outputTensorValues.at(i);
        expSum += std::exp(activation);
        if (activation > maxActivation)
        {
            predId = i;
            maxActivation = activation;
        }
    }
    std::cout << "Predicted Label ID: " << predId << std::endl;
    std::cout << "Predicted Label: " << labels.at(predId) << std::endl;
    std::cout << "Uncalibrated Confidence: " << std::exp(maxActivation) / expSum
              << std::endl;

    // Measure latency
    int numTests{100};
    std::chrono::steady_clock::time_point begin =
        std::chrono::steady_clock::now();
    for (int i = 0; i < numTests; i++)
    {
        session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                    inputTensors.data(), 1, outputNames.data(),
                    outputTensors.data(), 1);
    }
    std::chrono::steady_clock::time_point end =
        std::chrono::steady_clock::now();
    std::cout << "Minimum Inference Latency: "
              << std::chrono::duration_cast<std::chrono::milliseconds>(end -
                                                                       begin)
                         .count() /
                     static_cast<float>(numTests)
              << " ms" << std::endl;
}

参考:https://github.com/leimao/ONNX-Runtime-Inference

3、squeezenet

// Copyright(c) Microsoft Corporation.All rights reserved.
// Licensed under the MIT License.
//

#include <assert.h>
#include <vector>
#include <onnxruntime_cxx_api.h>

int main(int argc, char* argv[]) {
  //*************************************************************************
  // initialize  enviroment...one enviroment per process
  // enviroment maintains thread pools and other state info
  Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");

  // initialize session options if needed
  Ort::SessionOptions session_options;
  session_options.SetIntraOpNumThreads(1);

  // If onnxruntime.dll is built with CUDA enabled, we can uncomment out this line to use CUDA for this
  // session (we also need to include cuda_provider_factory.h above which defines it)
  // #include "cuda_provider_factory.h"
  // OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 1);

  // Sets graph optimization level
  // Available levels are
  // ORT_DISABLE_ALL -> To disable all optimizations
  // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals)
  // ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions)
  // ORT_ENABLE_ALL -> To Enable All possible opitmizations
  session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);

  //*************************************************************************
  // create session and load model into memory
  // using squeezenet version 1.3
  // URL = https://github.com/onnx/models/tree/master/squeezenet
#ifdef _WIN32
  const wchar_t* model_path = L"squeezenet.onnx";
#else
  const char* model_path = "squeezenet.onnx";
#endif

  printf("Using Onnxruntime C++ API\n");
  Ort::Session session(env, model_path, session_options);

  //*************************************************************************
  // print model input layer (node names, types, shape etc.)
  Ort::AllocatorWithDefaultOptions allocator;

  // print number of model input nodes
  size_t num_input_nodes = session.GetInputCount();
  std::vector<const char*> input_node_names(num_input_nodes);
  std::vector<int64_t> input_node_dims;  // simplify... this model has only 1 input node {1, 3, 224, 224}.
                                         // Otherwise need vector<vector<>>

  printf("Number of inputs = %zu\n", num_input_nodes);

  // iterate over all input nodes
  for (int i = 0; i < num_input_nodes; i++) {
    // print input node names
    char* input_name = session.GetInputName(i, allocator);
    printf("Input %d : name=%s\n", i, input_name);
    input_node_names[i] = input_name;

    // print input node types
    Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
    auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType type = tensor_info.GetElementType();
    printf("Input %d : type=%d\n", i, type);

    // print input shapes/dims
    input_node_dims = tensor_info.GetShape();
    printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
    for (int j = 0; j < input_node_dims.size(); j++)
      printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]);
  }

  // Results should be...
  // Number of inputs = 1
  // Input 0 : name = data_0
  // Input 0 : type = 1
  // Input 0 : num_dims = 4
  // Input 0 : dim 0 = 1
  // Input 0 : dim 1 = 3
  // Input 0 : dim 2 = 224
  // Input 0 : dim 3 = 224

  //*************************************************************************
  // Similar operations to get output node information.
  // Use OrtSessionGetOutputCount(), OrtSessionGetOutputName()
  // OrtSessionGetOutputTypeInfo() as shown above.

  //*************************************************************************
  // Score the model using sample data, and inspect values

  size_t input_tensor_size = 224 * 224 * 3;  // simplify ... using known dim values to calculate size
                                             // use OrtGetTensorShapeElementCount() to get official size!

  std::vector<float> input_tensor_values(input_tensor_size);
  std::vector<const char*> output_node_names = {"softmaxout_1"};

  // initialize input data with values in [0.0, 1.0]
  for (unsigned int i = 0; i < input_tensor_size; i++)
    input_tensor_values[i] = (float)i / (input_tensor_size + 1);

  // create input tensor object from data values
  auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
  Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
  assert(input_tensor.IsTensor());

  // score model & input tensor, get back output tensor
  auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
  assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());

  // Get pointer to output tensor float values
  float* floatarr = output_tensors.front().GetTensorMutableData<float>();
  assert(abs(floatarr[0] - 0.000045) < 1e-6);

  // score the model, and print scores for first 5 classes
  for (int i = 0; i < 5; i++)
    printf("Score for class [%d] =  %f\n", i, floatarr[i]);

  // Results should be as below...
  // Score for class[0] = 0.000045
  // Score for class[1] = 0.003846
  // Score for class[2] = 0.000125
  // Score for class[3] = 0.001180
  // Score for class[4] = 0.001317
  printf("Done!\n");
  return 0;
}
/*
Copyright (C) 2021, Intel Corporation
SPDX-License-Identifier: Apache-2.0
Portions of this software are copyright of their respective authors and released under the MIT license:
- ONNX-Runtime-Inference, Copyright 2020 Lei Mao. For licensing see https://github.com/leimao/ONNX-Runtime-Inference/blob/main/LICENSE.md
*/

#include <onnxruntime_cxx_api.h>
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>

#include <chrono>
#include <cmath>
#include <exception>
#include <fstream>
#include <iostream>
#include <limits>
#include <numeric>
#include <string>
#include <vector>
#include <stdexcept> // To use runtime_error

template <typename T>
T vectorProduct(const std::vector<T>& v)
{
    return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
}

/**
 * @brief Operator overloading for printing vectors
 * @tparam T
 * @param os
 * @param v
 * @return std::ostream&
 */

template <typename T>
std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
{
    os << "[";
    for (int i = 0; i < v.size(); ++i)
    {
        os << v[i];
        if (i != v.size() - 1)
        {
            os << ", ";
        }
    }
    os << "]";
    return os;
}

// Function to validate the input image file extension.
bool imageFileExtension(std::string str)
{
  // is empty throw error
  if (str.empty())
    throw std::runtime_error("[ ERROR ] The image File path is empty");

  size_t pos = str.rfind('.');
  if (pos == std::string::npos)
    return false;

  std::string ext = str.substr(pos+1);

  if (ext == "jpg" || ext == "jpeg" || ext == "gif" || ext == "png" || ext == "jfif" || 
        ext == "JPG" || ext == "JPEG" || ext == "GIF" || ext == "PNG" || ext == "JFIF") {
            return true;
  }

  return false;
}

// Function to read the labels from the labelFilepath.
std::vector<std::string> readLabels(std::string& labelFilepath)
{
    std::vector<std::string> labels;
    std::string line;
    std::ifstream fp(labelFilepath);
    while (std::getline(fp, line))
    {
        labels.push_back(line);
    }
    return labels;
}

// Function to validate the input model file extension.
bool checkModelExtension(const std::string& filename)
{
    if(filename.empty())
    {
        throw std::runtime_error("[ ERROR ] The Model file path is empty");
    }
    size_t pos = filename.rfind('.');
    if (pos == std::string::npos)
        return false;
    std::string ext = filename.substr(pos+1);
    if (ext == "onnx")
        return true;
    return false;
}

// Function to validate the Label file extension.
bool checkLabelFileExtension(const std::string& filename)
{
    size_t pos = filename.rfind('.');
    if (filename.empty())
    {
        throw std::runtime_error("[ ERROR ] The Label file path is empty");
    }
    if (pos == std::string::npos)
        return false;
    std::string ext = filename.substr(pos+1);
    if (ext == "txt") {
        return true;
    } else {
        return false;
    }
}

//Handling divide by zero
float division(float num, float den){
   if (den == 0) {
      throw std::runtime_error("[ ERROR ] Math error: Attempted to divide by Zero\n");
   }
   return (num / den);
}

void printHelp() {
    std::cout << "To run the model, use the following command:\n";
    std::cout << "Example: ./run_squeezenet --use_openvino <path_to_the_model> <path_to_the_image> <path_to_the_classes_file>" << std::endl;
    std::cout << "\n To Run using OpenVINO EP.\nExample: ./run_squeezenet --use_openvino squeezenet1.1-7.onnx demo.jpeg synset.txt \n" << std::endl;
    std::cout << "\n To Run on Default CPU.\n Example: ./run_squeezenet --use_cpu squeezenet1.1-7.onnx demo.jpeg synset.txt \n" << std::endl;
}

int main(int argc, char* argv[])
{
    bool useOPENVINO{true};
    const char* useOPENVINOFlag = "--use_openvino";
    const char* useCPUFlag = "--use_cpu";

    if(argc == 2) {
        std::string option = argv[1];
        if (option == "--help" || option == "-help" || option == "--h" || option == "-h") {
            printHelp();
        }
        return 0;
    } else if(argc != 5) {
        std::cout << "[ ERROR ] you have used the wrong command to run your program." << std::endl;
        printHelp();
        return 0;
    } else if (strcmp(argv[1], useOPENVINOFlag) == 0) {
        useOPENVINO = true;
    } else if (strcmp(argv[1], useCPUFlag) == 0) {
        useOPENVINO = false;
    }

    if (useOPENVINO)
    {
        std::cout << "Inference Execution Provider: OPENVINO" << std::endl;
    }
    else
    {
        std::cout << "Inference Execution Provider: CPU" << std::endl;
    }

    std::string instanceName{"image-classification-inference"};

    std::string modelFilepath = argv[2]; // .onnx file

    //validate ModelFilePath
    checkModelExtension(modelFilepath);
    if(!checkModelExtension(modelFilepath)) {
        throw std::runtime_error("[ ERROR ] The ModelFilepath is not correct. Make sure you are setting the path to an onnx model file (.onnx)");
    }
    std::string imageFilepath = argv[3];

    // Validate ImageFilePath
    imageFileExtension(imageFilepath);
    if(!imageFileExtension(imageFilepath)) {
        throw std::runtime_error("[ ERROR ] The imageFilepath doesn't have correct image extension. Choose from jpeg, jpg, gif, png, PNG, jfif");
    }
    std::ifstream f(imageFilepath.c_str());
    if(!f.good()) {
        throw std::runtime_error("[ ERROR ] The imageFilepath is not set correctly or doesn't exist");
    }

    // Validate LabelFilePath
    std::string labelFilepath = argv[4];
    if(!checkLabelFileExtension(labelFilepath)) {
        throw std::runtime_error("[ ERROR ] The LabelFilepath is not set correctly and the labels file should end with extension .txt");
    }

    std::vector<std::string> labels{readLabels(labelFilepath)};

    Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
                 instanceName.c_str());
    Ort::SessionOptions sessionOptions;
    sessionOptions.SetIntraOpNumThreads(1);

    //Appending OpenVINO Execution Provider API
    if (useOPENVINO) {
        // Using OPENVINO backend
        OrtOpenVINOProviderOptions options;
        options.device_type = "CPU_FP32"; //Other options are: GPU_FP32, GPU_FP16, MYRIAD_FP16
        std::cout << "OpenVINO device type is set to: " << options.device_type << std::endl;
        sessionOptions.AppendExecutionProvider_OpenVINO(options);
    }
    
    // Sets graph optimization level
    // Available levels are
    // ORT_DISABLE_ALL -> To disable all optimizations
    // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node
    // removals) ORT_ENABLE_EXTENDED -> To enable extended optimizations
    // (Includes level 1 + more complex optimizations like node fusions)
    // ORT_ENABLE_ALL -> To Enable All possible optimizations
    sessionOptions.SetGraphOptimizationLevel(
        GraphOptimizationLevel::ORT_DISABLE_ALL);

    //Creation: The Ort::Session is created here
    Ort::Session session(env, modelFilepath.c_str(), sessionOptions);

    Ort::AllocatorWithDefaultOptions allocator;

    size_t numInputNodes = session.GetInputCount();
    size_t numOutputNodes = session.GetOutputCount();

    std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
    std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;

    const char* inputName = session.GetInputName(0, allocator);
    std::cout << "Input Name: " << inputName << std::endl;

    Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
    auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
    std::cout << "Input Type: " << inputType << std::endl;

    std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
    std::cout << "Input Dimensions: " << inputDims << std::endl;

    const char* outputName = session.GetOutputName(0, allocator);
    std::cout << "Output Name: " << outputName << std::endl;

    Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
    auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
    std::cout << "Output Type: " << outputType << std::endl;

    std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
    std::cout << "Output Dimensions: " << outputDims << std::endl;
    //pre-processing the Image
    // step 1: Read an image in HWC BGR UINT8 format.
    cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR);

    // step 2: Resize the image.
    cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
    cv::resize(imageBGR, resizedImageBGR,
               cv::Size(inputDims.at(2), inputDims.at(3)),
               cv::InterpolationFlags::INTER_CUBIC);

    // step 3: Convert the image to HWC RGB UINT8 format.
    cv::cvtColor(resizedImageBGR, resizedImageRGB,
                 cv::ColorConversionCodes::COLOR_BGR2RGB);
    // step 4: Convert the image to HWC RGB float format by dividing each pixel by 255.
    resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);

    // step 5: Split the RGB channels from the image.   
    cv::Mat channels[3];
    cv::split(resizedImage, channels);

    //step 6: Normalize each channel.
    // Normalization per channel
    // Normalization parameters obtained from
    // https://github.com/onnx/models/tree/master/vision/classification/squeezenet
    channels[0] = (channels[0] - 0.485) / 0.229;
    channels[1] = (channels[1] - 0.456) / 0.224;
    channels[2] = (channels[2] - 0.406) / 0.225;

    //step 7: Merge the RGB channels back to the image.
    cv::merge(channels, 3, resizedImage);

    // step 8: Convert the image to CHW RGB float format.
    // HWC to CHW
    cv::dnn::blobFromImage(resizedImage, preprocessedImage);


    //Run Inference

    /* To run inference using ONNX Runtime, the user is responsible for creating and managing the 
    input and output buffers. These buffers could be created and managed via std::vector.
    The linear-format input data should be copied to the buffer for ONNX Runtime inference. */

    size_t inputTensorSize = vectorProduct(inputDims);
    std::vector<float> inputTensorValues(inputTensorSize);
    inputTensorValues.assign(preprocessedImage.begin<float>(),
                             preprocessedImage.end<float>());

    size_t outputTensorSize = vectorProduct(outputDims);
    assert(("Output tensor size should equal to the label set size.",
            labels.size() == outputTensorSize));
    std::vector<float> outputTensorValues(outputTensorSize);


    /* Once the buffers were created, they would be used for creating instances of Ort::Value 
    which is the tensor format for ONNX Runtime. There could be multiple inputs for a neural network, 
    so we have to prepare an array of Ort::Value instances for inputs and outputs respectively even if 
    we only have one input and one output. */

    std::vector<const char*> inputNames{inputName};
    std::vector<const char*> outputNames{outputName};
    std::vector<Ort::Value> inputTensors;
    std::vector<Ort::Value> outputTensors;

    /*
    Creating ONNX Runtime inference sessions, querying input and output names, 
    dimensions, and types are trivial.
    Setup inputs & outputs: The input & output tensors are created here. */

    Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
        OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
    inputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, inputTensorValues.data(), inputTensorSize, inputDims.data(),
        inputDims.size()));
    outputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, outputTensorValues.data(), outputTensorSize,
        outputDims.data(), outputDims.size()));

    /* To run inference, we provide the run options, an array of input names corresponding to the 
    inputs in the input tensor, an array of input tensor, number of inputs, an array of output names 
    corresponding to the the outputs in the output tensor, an array of output tensor, number of outputs. */

    session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                inputTensors.data(), 1, outputNames.data(),
                outputTensors.data(), 1);

    int predId = 0;
    float activation = 0;
    float maxActivation = std::numeric_limits<float>::lowest();
    float expSum = 0;
    /* The inference result could be found in the buffer for the output tensors, 
    which are usually the buffer from std::vector instances. */
    for (int i = 0; i < labels.size(); i++) {
        activation = outputTensorValues.at(i);
        expSum += std::exp(activation);
        if (activation > maxActivation)
        {
            predId = i;
            maxActivation = activation;
        }
    }
    std::cout << "Predicted Label ID: " << predId << std::endl;
    std::cout << "Predicted Label: " << labels.at(predId) << std::endl;
    float result;
    try {
      result = division(std::exp(maxActivation), expSum);
      std::cout << "Uncalibrated Confidence: " << result << std::endl;
    }
    catch (std::runtime_error& e) {
      std::cout << "Exception occurred" << std::endl << e.what();
    }

    // Measure latency
    int numTests{100};
    std::chrono::steady_clock::time_point begin =
        std::chrono::steady_clock::now();

    //Run: Running the session is done in the Run() method:
    for (int i = 0; i < numTests; i++) {
        session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                    inputTensors.data(), 1, outputNames.data(),
                    outputTensors.data(), 1);
    }
    std::chrono::steady_clock::time_point end =
        std::chrono::steady_clock::now();
    std::cout << "Minimum Inference Latency: "
              << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() / static_cast<float>(numTests)
              << " ms" << std::endl;
    return 0;
}

参考:microsoft/onnxruntime

4、squeezenet

#include <cuda_provider_factory.h>
#include <onnxruntime_cxx_api.h>

#include <opencv2/dnn/dnn.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>

#include <chrono>
#include <cmath>
#include <exception>
#include <fstream>
#include <iostream>
#include <limits>
#include <numeric>
#include <string>
#include <vector>

#define ONNX_MODEL_PATH "../../data/squeezenet1.1-7.onnx"
#define IMG_PATH "../../data/european-bee-eater-2115564_1920.jpg"
#define LABEL_PATH "../../data/synset.txt"

template <typename T>
T vectorProduct(const std::vector<T>& v)
{
    return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
}

/**
 * @brief Operator overloading for printing vectors
 * @tparam T
 * @param os
 * @param v
 * @return std::ostream&
 */
template <typename T>
std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
{
    os << "[";
    for (int i = 0; i < v.size(); ++i)
    {
        os << v[i];
        if (i != v.size() - 1)
        {
            os << ", ";
        }
    }
    os << "]";
    return os;
}

/**
 * @brief Print ONNX tensor data type
 * https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_c_api.h#L93
 * @param os
 * @param type
 * @return std::ostream&
 */
std::ostream& operator<<(std::ostream& os,
                         const ONNXTensorElementDataType& type)
{
    switch (type)
    {
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED:
            os << "undefined";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
            os << "float";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
            os << "uint8_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
            os << "int8_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
            os << "uint16_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16:
            os << "int16_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
            os << "int32_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
            os << "int64_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING:
            os << "std::string";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
            os << "bool";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
            os << "float16";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE:
            os << "double";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32:
            os << "uint32_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64:
            os << "uint64_t";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64:
            os << "float real + float imaginary";
            break;
        case ONNXTensorElementDataType::
            ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128:
            os << "double real + float imaginary";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16:
            os << "bfloat16";
            break;
        default:
            break;
    }

    return os;
}

cv::Mat normalization(std::string imageFilepath, std::vector<int64_t> inputDims){
    cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR);
    cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
    cv::resize(imageBGR, resizedImageBGR,
               cv::Size(inputDims.at(2), inputDims.at(3)),
               cv::InterpolationFlags::INTER_CUBIC);
    cv::cvtColor(resizedImageBGR, resizedImageRGB,
                 cv::ColorConversionCodes::COLOR_BGR2RGB);
    resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);

    cv::Mat channels[3];
    cv::split(resizedImage, channels);
    // Normalization per channel
    // Normalization parameters obtained from
    // https://github.com/onnx/models/tree/master/vision/classification/squeezenet
    channels[0] = (channels[0] - 0.485) / 0.229;
    channels[1] = (channels[1] - 0.456) / 0.224;
    channels[2] = (channels[2] - 0.406) / 0.225;
    cv::merge(channels, 3, resizedImage);
    // HWC to CHW
    cv::dnn::blobFromImage(resizedImage, preprocessedImage);
    return preprocessedImage;
}


std::vector<std::string> readLabels(std::string& labelFilepath)
{
    std::vector<std::string> labels;
    std::string line;
    std::ifstream fp(labelFilepath);
    while (std::getline(fp, line))
    {
        labels.push_back(line);
    }
    return labels;
}

int main(int argc, char* argv[])
{
    bool useCUDA{true};
    const char* useCUDAFlag = "--use_cuda";
    const char* useCPUFlag = "--use_cpu";
    if (argc == 1)
    {
        useCUDA = false;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCUDAFlag) == 0))
    {
        useCUDA = true;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCPUFlag) == 0))
    {
        useCUDA = false;
    }
    else if ((argc == 2) && (strcmp(argv[1], useCUDAFlag) != 0))
    {
        useCUDA = false;
    }
    else
    {
        throw std::runtime_error{"Too many arguments."};
    }

    if (useCUDA)
    {
        std::cout << "Inference Execution Provider: CUDA" << std::endl;
    }
    else
    {
        std::cout << "Inference Execution Provider: CPU" << std::endl;
    }

    // Define data and model
    std::string instanceName{"image-classification-inference"};
    std::string modelFilepath{ONNX_MODEL_PATH};
    std::string imageFilepath{IMG_PATH};
    std::string labelFilepath{LABEL_PATH};

    std::vector<std::string> labels{readLabels(labelFilepath)};

   
    
    Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
                 instanceName.c_str());
    Ort::SessionOptions sessionOptions;
    sessionOptions.SetIntraOpNumThreads(1);
    if (useCUDA)
    {
        // Using CUDA backend
        // https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/providers/cuda/cuda_provider_factory.h#L13
        OrtStatus* status =
            OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);
    }

    // Sets graph optimization level
    // Available levels are
    // ORT_DISABLE_ALL -> To disable all optimizations
    // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node
    // removals) ORT_ENABLE_EXTENDED -> To enable extended optimizations
    // (Includes level 1 + more complex optimizations like node fusions)
    // ORT_ENABLE_ALL -> To Enable All possible optimizations
    sessionOptions.SetGraphOptimizationLevel(
        GraphOptimizationLevel::ORT_ENABLE_EXTENDED);

    Ort::Session session(env, modelFilepath.c_str(), sessionOptions);

    Ort::AllocatorWithDefaultOptions allocator;

    size_t numInputNodes = session.GetInputCount();
    size_t numOutputNodes = session.GetOutputCount();

    std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
    std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;

    const char* inputName = session.GetInputName(0, allocator);
    std::cout << "Input Name: " << inputName << std::endl;

    Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
    auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
    std::cout << "Input Type: " << inputType << std::endl;

    std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
    std::cout << "Input Dimensions: " << inputDims << std::endl;

    const char* outputName = session.GetOutputName(0, allocator);
    std::cout << "Output Name: " << outputName << std::endl;

    Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
    auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
    std::cout << "Output Type: " << outputType << std::endl;

    std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
    std::cout << "Output Dimensions: " << outputDims << std::endl;
    
    // Normalize iamge
    cv::Mat preprocessedImage = normalization(imageFilepath, inputDims);

    
    size_t inputTensorSize = vectorProduct(inputDims);
    std::vector<float> inputTensorValues(inputTensorSize);
    inputTensorValues.assign(preprocessedImage.begin<float>(),
                             preprocessedImage.end<float>());

    size_t outputTensorSize = vectorProduct(outputDims);
    assert(("Output tensor size should equal to the label set size.",
            labels.size() == outputTensorSize));
    std::vector<float> outputTensorValues(outputTensorSize);

    std::vector<const char*> inputNames{inputName};
    std::vector<const char*> outputNames{outputName};
    std::vector<Ort::Value> inputTensors;
    std::vector<Ort::Value> outputTensors;

    Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
        OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
    inputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, inputTensorValues.data(), inputTensorSize, inputDims.data(),
        inputDims.size()));
    outputTensors.push_back(Ort::Value::CreateTensor<float>(
        memoryInfo, outputTensorValues.data(), outputTensorSize,
        outputDims.data(), outputDims.size()));

    // https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_cxx_api.h#L353
    session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                inputTensors.data(), 1, outputNames.data(),
                outputTensors.data(), 1);

    int predId = 0;
    float activation = 0;
    float maxActivation = std::numeric_limits<float>::lowest();
    float expSum = 0;
    for (int i = 0; i < labels.size(); i++)
    {
        activation = outputTensorValues.at(i);
        expSum += std::exp(activation);
        if (activation > maxActivation)
        {
            predId = i;
            maxActivation = activation;
        }
    }
    std::cout << "Predicted Label ID: " << predId << std::endl;
    std::cout << "Predicted Label: " << labels.at(predId) << std::endl;
    std::cout << "Uncalibrated Confidence: " << std::exp(maxActivation) / expSum
              << std::endl;

    // Measure latency
    int numTests{100};
    std::chrono::steady_clock::time_point begin =
        std::chrono::steady_clock::now();
    for (int i = 0; i < numTests; i++)
    {
        session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                    inputTensors.data(), 1, outputNames.data(),
                    outputTensors.data(), 1);
    }
    std::chrono::steady_clock::time_point end =
        std::chrono::steady_clock::now();
    std::cout << "Minimum Inference Latency: "
              << std::chrono::duration_cast<std::chrono::milliseconds>(end -
                                                                       begin)
                         .count() /
                     static_cast<float>(numTests)
              << " ms" << std::endl;
}

参考:madara-tribe/cpp_sample_onnx_inference

5、resnet101

#include <iostream>
#include <string>
#include <vector>
#include <random>
#include<opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
//#include "segmentation_onnx.h"
//#include <onnxruntime_cxx_inline.h>
//#include <cuda_provider_factory.h>
//#include <cpu_provider_factory.h>
//#include <onnxruntime_cxx_api.h>

#include <onnxruntime/core/session/onnxruntime_cxx_api.h>
#include "onnxruntime/core/providers/cpu/cpu_provider_factory.h"
#include "onnxruntime/core/providers/cuda/cuda_provider_factory.h"
#include "onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h"


#include <chrono>

static constexpr const int width_ = 640;
static constexpr const int height_ = 480;
static constexpr const int channel = 4;

std::array<float, 1 * width_ * height_ * channel> input_image_{};
//std::array<float, 1 * width_ * height_ * 1>              results_{};
cv::Mat results_ = cv::Mat::zeros(height_, width_, CV_32F);

#ifdef _WIN32
//const wchar_t* model_path = L"4channels384_640.onnx";
//const wchar_t* model_path = L"D:/pengt/code/Cplus/onnx_model/resnet101_21_384x640.onnx";
const wchar_t* model_path = L"D:/pengt/code/Cplus/onnx_model/resnet101_21_480x640.onnx";

#else
const char* model_path = "4channels384_640.onnx";
#endif


#define USE_CUDA

class ONNX_Model
{
public:
#ifdef _WIN32
    ONNX_Model(const wchar_t* model_path)
        : m_env{ ORT_LOGGING_LEVEL_ERROR, "" },
        m_session{ nullptr },
        m_sess_opts{},
        m_mem_info{ Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault) }
#else
    ONNX_Model(char* model_path)
#endif
    {
         //option
         /*  if (true)
            m_sess_opts.EnableCpuMemArena();
        else
            m_sess_opts.DisableCpuMemArena();*/

        //end
        
        /*
        // DirectML
        Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_DML(m_sess_opts, 0));
        m_sess_opts.DisableMemPattern();
        m_sess_opts.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
        m_session = Ort::Session(m_env, model_path, m_sess_opts);
        */

        
        // CPU
       // Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CPU(m_sess_opts, 0));
       // m_sess_opts.EnableMemPattern();
       // m_sess_opts.SetIntraOpNumThreads(8);
        //m_sess_opts.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
       // m_sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
       // m_session = Ort::Session(m_env, model_path, m_sess_opts);
        //tensorrt
        Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(m_sess_opts,0));
        Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(m_sess_opts, 0));

        //m_sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL);
        m_sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

        //m_sess_opts.SetOptimizedModelFilePath(out_optimize_path);  //save optimize

        m_session = Ort::Session(m_env, model_path, m_sess_opts);
        //m_session = Ort::Session(m_env, out_optimize_path, m_sess_opts);
        //end tensorrt
        

         // CUDA
        //Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(m_sess_opts, 0));
        //m_sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
        //m_session = Ort::Session(m_env, model_path, m_sess_opts);
       
        
        //
        auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
        input_tensor_ = Ort::Value::CreateTensor<float>(memory_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
        //output_tensor_ = Ort::Value::CreateTensor<float>(memory_info, results_.data(), results_.size(), output_shape_.data(), output_shape_.size());
        output_tensor_ = Ort::Value::CreateTensor<float>(memory_info, (float*)results_.data, height_ * width_, output_shape_.data(), output_shape_.size());

    }
    void  Run()
    {
        m_session.Run(Ort::RunOptions{ nullptr }, input_names.data(), &input_tensor_, 1, output_names.data(), &output_tensor_, 1);
        return;
    }

    std::vector<int64_t> get_input_shape_from_session()
    {
        Ort::TypeInfo info = m_session.GetInputTypeInfo(0);
        auto tensor_info = info.GetTensorTypeAndShapeInfo();
        size_t dim_count = tensor_info.GetDimensionsCount();
        std::vector<int64_t> dims(dim_count);
        tensor_info.GetDimensions(dims.data(), dims.size());
        return dims;
    }
private:
    Ort::Env m_env;
    Ort::Session m_session;
    Ort::SessionOptions m_sess_opts;
    Ort::MemoryInfo m_mem_info;
    //std::vector<const char*> input_names{ "0" };
    //std::vector<const char*> output_names{ "293" };  //Unet

    std::vector<const char*> input_names{ "input.1" };
    std::vector<const char*> output_names{ "1207" };  //resnet101
  
    //std::vector<const char*> input_names{  "input.1" };
    //std::vector<const char*> output_names{  "1683" };  //resnet152

    Ort::Value              input_tensor_{ nullptr };
    std::array<int64_t, 4>  input_shape_{ 1, channel,  height_,width_ };
    Ort::Value              output_tensor_{ nullptr };
    std::array<int64_t, 4>  output_shape_{ 1, 1, height_,width_ };

};


// data need 

//rgbm = rgbm / 255.
//mean = [0.485, 0.456, 0.406, 0]
//std = [0.229, 0.224, 0.225, 1]
//rgbm = rgbm - mean
//rgbm = rgbm / std
//x.transpose(2, 0, 1).astype('float32')

#if 0
void fill_data(cv::Mat input_img, cv::Mat pre_mask, float* output, const int index = 0)
{
    cv::Mat dst_img;
    cv::Mat dst_pre;
    cv::Scalar mean = cv::Scalar(0.485, 0.456, 0.406);
    cv::Scalar std = cv::Scalar(0.229, 0.224, 0.225);
    float scale = 0.00392;
    //cv::Scalar mean_ = cv::Scalar(109.125, 102.6, 91.35);
    //cv::Scalar std_ = cv::Scalar(0.0171, 0.01884, 0.01975);
    //if (images.depth() == CV_8U && ddepth == CV_32F)
    //    images.convertTo(images[i], CV_32F);
    //std::swap(mean[0], mean[2]);
    input_img.convertTo(dst_img, CV_32F, scale);
    pre_mask.convertTo(dst_pre, CV_32F, scale);

    dst_img -= mean;
    dst_img /= std;

    //dst_img -= mean_;
    //dst_img *= std_;
    //std::vector<cv::Mat> channels;
    //split(input_img, channels);//²ð·Ö
    //channels.push_back(pre_mask);
    //cv::convertTo(img_float, CV_32F, 1.0 / 255);

    int row = dst_img.rows;
    int col = dst_img.cols;

    //cv::Scalar rgb_mean = cv::mean(dst);
    //std::cout<< (dst_img.ptr<float>(0, 214)[0]) <<std::endl;


    for (int c = 0; c < 3; c++) {
        for (int i = 0; i < row; i++) {
            for (int j = 0; j < col; j++) {
                //output[c * row * col + i * col + j] = (dst_img.ptr<uchar>(i)[j * 3 + c]);
                //std::cout << "i and j :"<<i<<","<<j<<"="<<(dst_img.ptr<float>(i,j)[c]) << std::endl;
                output[c * row * col + i * col + j] = (dst_img.ptr<float>(i, j)[c]);
            }
        }
    }
    if (index % 20 == 0)
    {
        for (int i = 0; i < row; i++) {
            for (int j = 0; j < col; j++) {
                //output[4 * row * col + i * col + j] = (dst_pre.ptr<uchar>(i)[j]);
                output[3 * row * col + i * col + j] = (dst_pre.ptr<float>(i, j)[0]);
            }
        }
    }

    return;
}
#else
void fill_data(cv::Mat input_img, cv::Mat pre_mask, float* output, const int index = 0)
{
    float mean[3] = { 123.675, 116.28, 103.53 };
    float std[3] = { 0.0171, 0.0175, 0.0174 };
    float allmul[3] = { 2.1145,2.0349,1.8014 };
    float scale = 1;
    float* dst_pre = pre_mask.ptr<float>(0, 0);
    int row = input_img.rows;
    int col = input_img.cols;
    int alllen = row * col;
    //double timeStart = (double)cv::getTickCount();
    //HWC -> CHW
    float* inbuf = output;
    for (int c = 0; c < 3; c++) {
        uchar* img_data = input_img.data;
        for (int i = 0; i < row; i++) {
            for (int j = 0; j < col; j++) {
                *inbuf = img_data[c] * std[c] - allmul[c];//(dst_img.ptr<float>(i, j)[c]);
                img_data += 3;
                inbuf++;
            }
        }
    }

    //double circle = ((double)cv::getTickCount() - timeStart) / cv::getTickFrequency();
    //std::cout << "circle time  :" << circle << " sec";
    memcpy(inbuf, dst_pre, sizeof(float) * alllen);

    return;
}
#endif

using namespace cv;
std::vector<Vec3b> colors;

void colorizeSegmentation(const Mat& score, Mat& segm)
{
    const int rows = score.size[2];
    const int cols = score.size[3];
    const int chns = score.size[1];

    if (colors.empty()) {
        // Generate colors.
        colors.push_back(Vec3b());
        for (int i = 1; i < chns; ++i) {
            Vec3b color;
            for (int j = 0; j < 3; ++j)
                color[j] = (colors[i - 1][j] + rand() % 256) / 2;
            colors.push_back(color);
        }
    }
    else if (chns != (int)colors.size()) {
        CV_Error(Error::StsError, format("Number of output classes does not match "
            "number of colors (%d != %zu)", chns, colors.size()));
    }

    Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
    Mat maxVal(rows, cols, CV_32FC1, score.data);
    for (int ch = 1; ch < chns; ch++) {
        for (int row = 0; row < rows; row++) {
            const float* ptrScore = score.ptr<float>(0, ch, row);
            uint8_t* ptrMaxCl = maxCl.ptr<uint8_t>(row);
            float* ptrMaxVal = maxVal.ptr<float>(row);
            for (int col = 0; col < cols; col++) {
                if (ptrScore[col] > ptrMaxVal[col]) {
                    ptrMaxVal[col] = ptrScore[col];
                    ptrMaxCl[col] = (uchar)ch;
                }
            }
        }
    }

    segm.create(rows, cols, CV_8UC3);
    for (int row = 0; row < rows; row++) {
        const uchar* ptrMaxCl = maxCl.ptr<uchar>(row);
        Vec3b* ptrSegm = segm.ptr<Vec3b>(row);
        for (int col = 0; col < cols; col++) {
            ptrSegm[col] = colors[ptrMaxCl[col]];
        }
    }
}

// 背景,  前景, mask
cv::Mat replace_and_blend(cv::Mat bkimg, cv::Mat& frame, cv::Mat& mask)
{
    cv::Mat result = cv::Mat::zeros(frame.size(), frame.type());
    int h = frame.rows;
    int w = frame.cols;
    int m = 0;
    double wt = 0;

    int b = 0, g = 0, r = 0;
    int b1 = 0, g1 = 0, r1 = 0;
    int b2 = 0, g2 = 0, r2 = 0;

    for (int row = 0; row < h; row++)
    {
        uchar* current = frame.ptr<uchar>(row);
        uchar* bgrow = bkimg.ptr<uchar>(row);
        uchar* maskrow = mask.ptr<uchar>(row);
        uchar* targetrow = result.ptr<uchar>(row);

        for (int col = 0; col < w; col++)
        {
            m = *maskrow++;
            if (m == 0)   //如果是背景 替换为背景数据
            {
                *targetrow++ = *bgrow++;
                *targetrow++ = *bgrow++;
                *targetrow++ = *bgrow++;
                current += 3;
            }
            else if (m == 255) //如果是前景 保留原来数据
            {
                *targetrow++ = *current++;
                *targetrow++ = *current++;
                *targetrow++ = *current++;
                bgrow += 3;
            }
            else //由于形态学平滑造成的过渡区 颜色采用加权均衡化
            {
                b1 = *bgrow++;
                g1 = *bgrow++;
                r1 = *bgrow++;

                b2 = *current++;
                g2 = *current++;
                r2 = *current++;

                wt = m / 255.0;

                b = b2 * wt + b1 * (1 - wt);
                g = g2 * wt + g1 * (1 - wt);
                r = b2 * wt + r1 * (1 - wt);

                *targetrow++ = b;
                *targetrow++ = g;
                *targetrow++ = r;

            }
        }
    }
    return result;  //返回结果
}


#if 0
int main(void)
{
    ONNX_Model model;
    std::vector<int64_t> dims = model.get_input_shape_from_session();
    std::cout << "Input Shape: (";
    std::cout << dims[0] << ", " << dims[1] << ", " << dims[2] << ", " << dims[3] << ")" << std::endl;
    int inputwidth = 640;
    int inputheight = 384;
    cv::Mat pre_mask = cv::Mat::zeros(inputheight, inputwidth, CV_8UC1);  //height  width
    cv::Mat frame, image0;
    frame = cv::imread("D:/pengt/segmetation/test_pic/1.png");
    cv::resize(frame, frame, cv::Size(inputwidth, inputheight));  //width height
    cv::cvtColor(frame, image0, cv::COLOR_BGR2RGB);

    double timeStart = (double)cv::getTickCount();
    float* output = input_image_.data();

    std::fill(input_image_.begin(), input_image_.end(), 0.f);
    fill_data(image0, pre_mask, output);

    model.Run();
    double nTime = ((double)cv::getTickCount() - timeStart) / cv::getTickFrequency();
    std::cout << "running time £º" << nTime << "sec\n" << std::endl;

    cv::Mat segm;
    //colorizeSegmentation(results_, segm);
    cv::threshold(results_, segm, 0.5, 200, cv::THRESH_BINARY);

    cv::imshow("mask", segm);
    cv::imshow("pre_image", frame);
    cv::waitKey(0);

    return 0;
}


#else
int main(void)
{
    ONNX_Model model(model_path);
    std::vector<int64_t> dims = model.get_input_shape_from_session();
    std::cout << "Input Shape: (";
    std::cout << dims[0] << ", " << dims[1] << ", " << dims[2] << ", " << dims[3] << ")" << std::endl;
    int inputwidth = width_;
    int inputheight = height_;
    cv::Mat pre_mask = cv::Mat::zeros(height_, width_, CV_32F);//cv::Mat::zeros(inputheight, inputwidth, CV_8UC1);  //height  width
    cv::Mat frame, image0;
    cv::Mat backimg = cv::imread("0.jpg");
    cv::resize(backimg, backimg, cv::Size(inputwidth, inputheight));

    cv::VideoCapture capture(0);
    capture.set(cv::CAP_PROP_FRAME_WIDTH, 640);

    capture.set(cv::CAP_PROP_FRAME_HEIGHT, 360);
    int index = 0;
   /// object_rect res_area;

    //需顺时针90°旋转时,transpose(src, tmp) + flip(tmp, dst, 1)

    //需逆时针90°旋转时,transpose(src, tmp) + flip(tmp, dst, 0)

    while (true)
    {
        index += 1;
        capture >> frame;

        //cv::transpose(frame, rotef);
        //cv::Mat rodst;
        //cv::rotate(frame, rodst, 0); //顺时针90
        //cv::rotate(frame, rodst, 2);   //顺时针270


         cv::Mat sizeFrame;
        double timeStart = (double)cv::getTickCount();
        //resize_uniform(frame, sizeFrame, cv::Size(inputwidth, inputheight), res_area);

        cv::resize(frame, sizeFrame, cv::Size(inputwidth, inputheight));  //width height
        cv::cvtColor(sizeFrame, image0, cv::COLOR_BGR2RGB);


        float* output = input_image_.data();

        // std::fill(input_image_.begin(), input_image_.end(), 0.f);  // 暂时去掉
        fill_data(image0, pre_mask, output, index);

        double midpredict = (double)cv::getTickCount();

        double postTime = (midpredict - timeStart) / cv::getTickFrequency();
        std::cout << "  running time post time :" << postTime << "sec";

        model.Run();
        double stpreTime = (double)cv::getTickCount();
        double preTime = (stpreTime - midpredict) / cv::getTickFrequency();
        std::cout << "  predict time  :" << preTime << "sec";


        cv::Mat segm;
        const int inputMaskValue = 1;
        cv::threshold(results_, segm, 0.5, 1, cv::THRESH_BINARY);

         contour
       // cv::Mat binary = segm * 255;

       // //cv::threshold(segm * 255, binary, 127, 255, cv::THRESH_BINARY);
       // cv::Mat ubinary;
       // binary.convertTo(ubinary, CV_8U);
       ///* cv::Canny(ubinary, ubinary, 60, 255,3); //可以修改canny替换所有
       // cv::imshow("canny", ubinary);*/
       // std::vector<std::vector<cv::Point>> contours;
       // std::vector<cv::Vec4i> hierarchy;

       // cv::findContours(ubinary, contours, hierarchy, cv::RETR_TREE, cv::CHAIN_APPROX_SIMPLE, cv::Point());
       // cv::Mat imageContours = cv::Mat::zeros(ubinary.size(), CV_8UC1);
       // for (int i = 0; i < contours.size(); i++)
       // {
       //     cv::drawContours(imageContours, contours, i, cv::Scalar(255), 1, 8, hierarchy);
       // }

       // double preTime = ((double)cv::getTickCount() - midpredict) / cv::getTickFrequency();

        //std::cout << "predict time  :" << preTime << "sec\n" << std::endl;
        //cv::imshow("edge", imageContours);

        /// 


        segm.copyTo(pre_mask);
        pre_mask = 0.7 * pre_mask;


        cv::Mat ucharSegem;
        segm = segm * 255;
        segm.convertTo(ucharSegem, CV_8U);

        ///
         //图形学处理 平滑mask;
        cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
        cv::Mat dstmask;
        cv::morphologyEx(ucharSegem, dstmask, cv::MORPH_OPEN, kernel);
        //cv::imshow("mask1", dstmask);
        //高斯处理 边缘更平滑 效果更好看
        cv::GaussianBlur(dstmask, dstmask, cv::Size(3, 3), 0, 0);
        //cv::imshow("mask2", dstmask);
        cv::Mat dst = replace_and_blend(backimg, sizeFrame, dstmask);

        //Mat resultdst;
        //crop_effect_area(dst, resultdst, frame.size(), res_area);

        double lastTime = ((double)cv::getTickCount() - stpreTime) / cv::getTickFrequency();

        std::cout << "  last prcess time :" << lastTime << "sec\n" << std::endl;

        cv::imshow("mask", ucharSegem);
       // cv::imshow("resultdst", resultdst);
        cv::imshow("dst", dst);
        //cv::Mat dst = cv::addWeighted(img1, 0.7, img2, 0.3, 0);
        //cv::imshow("mask", segm);

        //crop_effect_area(Mat & uniform_scaled, Mat & dst, Size ori_size, object_rect effect_area);

        //cv::imshow("pre_image", frame);
        cv::waitKey(1);

    }

    return 0;
}


#endif

参考:ptklx/onnxruntime_segment

6、CenterNet_onnxruntime

#include <core/session/onnxruntime_cxx_api.h>
#include <core/providers/cuda/cuda_provider_factory.h>
#include <core/session/onnxruntime_c_api.h>
#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <vector>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <sys/time.h>
#include <math.h>
 
using namespace cv;
using namespace std;
 

 
#define PI 3.14159265358979323846
 
struct Box
{
    float x1;
    float y1;
    float x2;
    float y2;
    float ang;
};
 
struct Detection
{
    Box bbox;
    int classId;
    float prob;
};

float Logist(float data)
{
    return 1. / (1. + exp(-data));
}
 
void preProcess(cv::Mat &img, float *output)
{
    int input_w = 512;
    int input_h = 512;
    float scale = cv::min(float(input_w) / img.cols, float(input_h) / img.rows);
    auto scaleSize = cv::Size(img.cols * scale, img.rows * scale);
 
    cv::Mat resized;
    cv::resize(img, resized, scaleSize, 0, 0);
    cv::Mat cropped = cv::Mat::zeros(input_h, input_w, CV_8UC3);
    cv::Rect rect((input_w - scaleSize.width) / 2, (input_h - scaleSize.height) / 2, scaleSize.width, scaleSize.height);
 
    resized.copyTo(cropped(rect));
    // imwrite("img_process.png", cropped); 
 
    constexpr static float mean[] = {0.5194416012442385, 0.5378052387430711, 0.533462090585746};
    constexpr static float std[] = {0.3001546018824507, 0.28620901391179554, 0.3014112676161966};
    int row = 512;
    int col = 512;
    for (int c = 0; c < 3; c++)
    {
        for (int i = 0; i < row; i++)
        {
            for (int j = 0; j < col; j++)
            {
                float pix = cropped.ptr<uchar>(i)[j * 3 + c];
                output[c * row * col + i * col + j] = (pix / 255. - mean[c]) / std[c];
            }
        }
    }
}
 
void postProcess(const float *hm, const float *wh, const float *ang, const float *reg,
                vector<Detection> &result, const int w, const int h, const int classes,
                const int kernel_size,const float visthresh)
{
    int flag = 0;
    for (int idx = 0; idx < w * h * classes; idx++)
    {
        if (idx >= w * h * classes)
            return;
        int padding = (kernel_size - 1) / 2; //1
        int offset = -padding;               //-1
        int stride = w * h;                  //128*128
        int grid_x = idx % w;                // 纵轴坐标,x
        int grid_y = (idx / w) % h;          // 横轴坐标,y
        int cls = idx / w / h;               // 第几类
        int l, m;
        int reg_index = idx - cls * stride; // 一张图里面的位置,[0,128*128]
        float c_x, c_y;
        float objProb = Logist(hm[idx]);
        flag += 1;
 
        if (objProb > visthresh)
        {
            float max = -1;
            int max_index = 0;
            for (l = 0; l < kernel_size; ++l)
                for (m = 0; m < kernel_size; ++m)
                {
                    int cur_x = offset + l + grid_x;
                    int cur_y = offset + m + grid_y;
                    int cur_index = cur_y * w + cur_x + stride * cls;
                    int valid = (cur_x >= 0 && cur_x < w && cur_y >= 0 && cur_y < h);
                    float val = (valid != 0) ? Logist(hm[cur_index]) : -1;
                    max_index = (val > max) ? cur_index : max_index;
                    max = (val > max) ? val : max;
                }
 
            if (idx == max_index)
            {
                Detection det;
                c_x = grid_x + reg[reg_index];
                c_y = grid_y + reg[reg_index + stride];
                float angle = ang[reg_index];
 
                det.bbox.x1 = (c_x - wh[reg_index] / 2) * 4;
                det.bbox.y1 = (c_y - wh[reg_index + stride] / 2) * 4;
                det.bbox.x2 = (c_x + wh[reg_index] / 2) * 4;
                det.bbox.y2 = (c_y + wh[reg_index + stride] / 2) * 4;
                det.bbox.ang = angle;
                det.classId = cls;
                det.prob = objProb;
                result.push_back(det);
            }
        }
    }
}
 
void resultCorrect(std::vector<Detection> &result, const cv::Mat &img)
{
    int input_w = 512;
    int input_h = 512;
    float scale = min(float(input_w) / img.cols, float(input_h) / img.rows);
    float dx = (input_w - scale * img.cols) / 2;
    float dy = (input_h - scale * img.rows) / 2;
    for (auto &item : result)
    {
        float x1 = (item.bbox.x1 - dx) / scale;
        float y1 = (item.bbox.y1 - dy) / scale;
        float x2 = (item.bbox.x2 - dx) / scale;
        float y2 = (item.bbox.y2 - dy) / scale;
        x1 = (x1 > 0) ? x1 : 0;
        y1 = (y1 > 0) ? y1 : 0;
        x2 = (x2 < img.cols) ? x2 : img.cols - 1;
        y2 = (y2 < img.rows) ? y2 : img.rows - 1;
        item.bbox.x1 = x1;
        item.bbox.y1 = y1;
        item.bbox.x2 = x2;
        item.bbox.y2 = y2;
    }
}

void draw(const std::vector<Detection> &result, cv::Mat &img)
{
    for (const auto &item : result)
    {
        float ang = item.bbox.ang;
        float cx = (item.bbox.x1 + item.bbox.x2) / 2;
        float cy = (item.bbox.y1 + item.bbox.y2) / 2;
        float height = (item.bbox.x2 - item.bbox.x1);
        float width = (item.bbox.y2 - item.bbox.y1);
        float anglePi = ang / 180 * PI;
        anglePi = anglePi < PI ? anglePi : anglePi - PI;
        float cosA = cos(anglePi);
        float sinA = sin(anglePi);
        float x1 = cx - 0.5 * width;
        float y1 = cy - 0.5 * height;
 
        float x0 = cx + 0.5 * width;
        float y0 = y1;
 
        float x2 = x1;
        float y2 = cy + 0.5 * height;
 
        float x3 = x0;
        float y3 = y2;
 
        int x0n = floor((x0 - cx) * cosA - (y0 - cy) * sinA + cx);
        int y0n = floor((x0 - cx) * sinA + (y0 - cy) * cosA + cy);
 
        int x1n = floor((x1 - cx) * cosA - (y1 - cy) * sinA + cx);
        int y1n = floor((x1 - cx) * sinA + (y1 - cy) * cosA + cy);
 
        int x2n = floor((x2 - cx) * cosA - (y2 - cy) * sinA + cx);
        int y2n = floor((x2 - cx) * sinA + (y2 - cy) * cosA + cy);
 
        int x3n = floor((x3 - cx) * cosA - (y3 - cy) * sinA + cx);
        int y3n = floor((x3 - cx) * sinA + (y3 - cy) * cosA + cy);
 
        cv::line(img, cv::Point(x0n, y0n), cv::Point(x1n, y1n), cv::Scalar(0, 0, 255), 3, 8, 0);
        cv::line(img, cv::Point(x1n, y1n), cv::Point(x2n, y2n), cv::Scalar(255, 0, 0), 3, 8, 0);
        cv::line(img, cv::Point(x2n, y2n), cv::Point(x3n, y3n), cv::Scalar(0, 0, 255), 3, 8, 0);
        cv::line(img, cv::Point(x3n, y3n), cv::Point(x0n, y0n), cv::Scalar(255, 0, 0), 3, 8, 0);
    }
}
 
int main(int argc, const char** argv)
{
    if (argc !=3)
	{
		std::cout << "you should input: \n./predict your_model_path//your_model.onnx your_img_path//your_img.jpg " << std::endl;
		return -1;
	}
	std::string model_path = argv[1];
	std::string image_file = argv[2];
    // 1. 加载模型
    Ort::Env env{ORT_LOGGING_LEVEL_WARNING, "R-CenterNet"};
    Ort::SessionOptions session_option;
    session_option.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
    Ort::Session session_{env, model_path.c_str(), session_option};

    // 2. 定义输入输出层,用netron看,我这里是0,512、515、518、521,分别对应输入图片,输出hm、wh、ang、reg
    std::vector<const char *> input_names = {"0"};
    const char *const output_names[] = {"512", "515", "518", "521"};

    // 3. 加载准备推理的图片
    Mat img = imread(image_file);

    // 4. 这里是为加载的图片准备一个输入的tensor,其实只要修改512,512就行
    Ort::Value input_tensor_{nullptr};
    std::array<float, 1 * 3 * 512 * 512> input_image_{};
    std::array<int64_t, 4> input_shape_{1, 3, 512, 512};

    auto allocator_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
    input_tensor_ = Ort::Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
    
    float *input_float = input_image_.data();
    fill(input_image_.begin(), input_image_.end(), 0.f);

    // 5. 预处理是把输入图片尺寸resize到网络接受的尺寸,并归一化
    preProcess(img, input_float);

    // 6. 开始推理
    std::vector<Ort::Value> ort_outputs = session_.Run(Ort::RunOptions{nullptr}, input_names.data(),
                                                       &input_tensor_, 1, output_names, 4);
 
    // 7. 后处理,提取出目标坐标、长宽、角度以及修正值,ort_outputs[0:4] -> hm wh ang reg,
    vector<Detection> result;
    postProcess(ort_outputs[0].GetTensorMutableData<float>(), ort_outputs[1].GetTensorMutableData<float>(),
                 ort_outputs[2].GetTensorMutableData<float>(), ort_outputs[3].GetTensorMutableData<float>(),
                 result, 128, 128, 1, 3, 0.3);

    // 8. 修正坐标,将坐标为负数的归为0,大于图像尺寸的置为图像边界尺寸
    resultCorrect(result, img);

    // 9. 画图
    draw(result, img);
    imwrite("result.jpg", img);
    return 0;
}

参考:ZeroE04/CenterNet_onnxruntime

7、语义分割模型

ONNXRuntime写代码十分简洁
大概分为三部分

  • 1.初始化环境,会话等
  • 2.会话中加载模型,得到模型的输入和输出节点
  • 3.调用API得到模型的返回值
    这里以语义分割模型U2Net为例
#include <assert.h>
#include <vector>
#include<ctime>
#include <onnxruntime_cxx_api.h>
int main(int argc, char* argv[]) 
{
    //记录程序运行时间
    auto start_time = clock();
    //初始化环境,每个进程一个环境
    //环境保留了线程池和其他状态信息
    Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
    //初始化Session选项
    Ort::SessionOptions session_options;
    session_options.SetIntraOpNumThreads(1);
    // Available levels are
    // ORT_DISABLE_ALL -> To disable all optimizations
    // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals)
    // ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions)
    // ORT_ENABLE_ALL -> To Enable All possible opitmizations
    session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);

    //*************************************************************************
    // 创建Session并把模型加载到内存中
    const wchar_t* model_path = L"u2net.onnx";

    printf("Using Onnxruntime C++ API\n");
    Ort::Session session(env, model_path, session_options);

    //*************************************************************************
    //打印模型的输入层(node names, types, shape etc.)
    Ort::AllocatorWithDefaultOptions allocator;

    //输出模型输入节点的数量
    size_t num_input_nodes = session.GetInputCount();
    size_t num_output_nodes = session.GetOutputCount();
    std::vector<const char*> input_node_names(num_input_nodes);
    std::vector<const char*> output_node_names(num_output_nodes);
    std::vector<int64_t> input_node_dims;  // simplify... this model has only 1 input node {1, 3, 224, 224}.
                                           // Otherwise need vector<vector<>>

    printf("Number of inputs = %zu\n", num_input_nodes);
    //迭代所有的输入节点
    for (int i = 0; i < num_input_nodes; i++) {
         //输出输入节点的名称
        char* input_name = session.GetInputName(i, allocator);
        printf("Input %d : name=%s\n", i, input_name);
        input_node_names[i] = input_name;

        // 输出输入节点的类型
        Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();

        ONNXTensorElementDataType type = tensor_info.GetElementType();
        printf("Input %d : type=%d\n", i, type);

        input_node_dims = tensor_info.GetShape();
        //输入节点的打印维度
          printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
        //打印各个维度的大小
          for (int j = 0; j < input_node_dims.size(); j++)
             printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]);
        //batch_size=1
        input_node_dims[0] = 1;
    }
    //打印输出节点信息,方法类似
    for (int i = 0; i < num_output_nodes; i++)
    {
        char* output_name = session.GetOutputName(i, allocator);
        printf("Output: %d name=%s\n", i, output_name);
        output_node_names[i] = output_name;
        Ort::TypeInfo type_info = session.GetOutputTypeInfo(i);
        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
        ONNXTensorElementDataType type = tensor_info.GetElementType();
        printf("Output %d : type=%d\n", i, type);
        auto output_node_dims = tensor_info.GetShape();
        printf("Output %d : num_dims=%zu\n", i, output_node_dims.size());
        for (int j = 0; j < input_node_dims.size(); j++)
            printf("Output %d : dim %d=%jd\n", i, j, output_node_dims[j]);
    }
    
    //*************************************************************************
    // 使用样本数据对模型进行评分,并检验出入值的合法性
    size_t input_tensor_size = 3 * 320 * 320;  // simplify ... using known dim values to calculate size
                                               // use OrtGetTensorShapeElementCount() to get official size!
    
    std::vector<float> input_tensor_values(input_tensor_size);

    // 初始化一个数据(演示用,这里实际应该传入归一化的数据)
    for (unsigned int i = 0; i < input_tensor_size; i++)
        input_tensor_values[i] = (float)i / (input_tensor_size + 1);
    
    // 为输入数据创建一个Tensor对象
    try
    {
        auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
        //assert(input_tensor.IsTensor());
    
	    // 推理得到结果
	    auto output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
	    assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
	
	    // Get pointer to output tensor float values
	    float* floatarr = output_tensors.front().GetTensorMutableData<float>(); 
	    printf("Number of outputs = %d\n", output_tensors.size());
    }
    catch (Ort::Exception& e)
    {
        printf(e.what());
    }
    auto end_time = clock();
    printf("Proceed exit after %.2f seconds\n", static_cast<float>(end_time - start_time) / CLOCKS_PER_SEC);
    printf("Done!\n");
    return 0;
}

输出:
在这里插入图片描述
然后我们进一步进行一定程度的封装,方便我们后续使用

class U2NetModel
{
public:
    U2NetModel(const wchar_t* onnx_model_path);
    float* predict(std::vector<float>input_data,int batch_size=1);
private:
    Ort::Env env;
    Ort::Session session;
    Ort::AllocatorWithDefaultOptions allocator;
    std::vector<const char*>input_node_names;
    std::vector<const char*>output_node_names;
    std::vector<int64_t> input_node_dims;
};
U2NetModel::U2NetModel(const wchar_t* onnx_model_path):session(nullptr),env(nullptr)
{
    //初始化环境,每个进程一个环境,环境保留了线程池和其他状态信息
    this->env=Ort::Env(ORT_LOGGING_LEVEL_WARNING, "u2net");
    //初始化Session选项
    Ort::SessionOptions session_options;
    session_options.SetInterOpNumThreads(1);
    session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
    // 创建Session并把模型加载到内存中
    this->session=Ort::Session(env, onnx_model_path,session_options);
    //输入输出节点数量和名称
    size_t num_input_nodes = session.GetInputCount();
    size_t num_output_nodes = session.GetOutputCount();
    for (int i = 0; i < num_input_nodes; i++)
    {
        auto input_node_name = session.GetInputName(i, allocator);
        this->input_node_names.push_back(input_node_name);
        Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
        ONNXTensorElementDataType type = tensor_info.GetElementType();
        this->input_node_dims = tensor_info.GetShape();
    }
    for (int i = 0; i < num_output_nodes; i++)
    {
        auto output_node_name = session.GetOutputName(i, allocator);
        this->output_node_names.push_back(output_node_name);
    }
}
float* U2NetModel::predict(std::vector<float>input_tensor_values,int batch_size)
{
    this->input_node_dims[0] = batch_size;
    auto input_tensor_size = input_tensor_values.size();
    auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
    Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
    auto output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
    assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
    float* floatarr = output_tensors.front().GetTensorMutableData<float>();
    return floatarr;
}

然后初始化一个示例并调用接口

int main(int argc, char* argv[]) 
{
    auto start_time = std::clock();
    U2NetModel u2net(L"u2net.onnx");
    size_t input_tensor_size = 3 * 320 * 320;
    std::vector<float> input_tensor_values(input_tensor_size);

    //初始化一个数据(演示用)
    for (unsigned int i = 0; i < input_tensor_size; i++)
    {
        input_tensor_values[i] = (float)i / (input_tensor_size + 1);
    }
    float* results = nullptr;
    try
    {
        results = u2net.predict(input_tensor_values);
    }
    catch (Ort::Exception& e)
    {
        delete results;
        printf("%s\n", e.what());
    }
    auto end_time = std::clock();
    printf("Proceed exits after %.2f seconds", static_cast<float>(end_time - start_time) / 1000);
    printf("Done!\n");
    return 0;
}

现在模型部分结束了,但问题是我们其实并没法得知我们的模型运行情况
所以我们还需要读入图片和显示图片,也就是
1.读入一张图片
2.模型推理
3.屏幕打印图片

借助OpenCV进行模型推理

首先是重载一个新的predict函数来支持cv::Mat数据
当然这里我新写的版本已经不直接返回float*,而是std::vector

class U2NetModel
{
public:
    ...
    std::vector<float> predict(std::vector<float>& input_data,int batch_size=1,int index=0);
    cv::Mat predict(cv::Mat& input_tensor, int batch_size = 1, int index = 0);
    ...
}

代码实现,增加了对于cv::Mat的处理:

std::vector<float> U2NetModel::predict(std::vector<float>& input_tensor_values,int batch_size,int index)
{
    this->input_node_dims[0] = batch_size;
    this->output_node_dims[0] = batch_size;
    float* floatarr = nullptr;
    try
    {
        std::vector<const char*>output_node_names;
        if (index != -1)
        {
            output_node_names = { this->output_node_names[index] };
        }
        else
        {
            output_node_names = this->output_node_names;
        }
        this->input_node_dims[0] = batch_size;
        auto input_tensor_size = input_tensor_values.size();
        auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
        auto output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
        assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
        floatarr = output_tensors.front().GetTensorMutableData<float>();
    }
    catch (Ort::Exception&e)
    {
        throw e;
    }
    int64_t output_tensor_size = 1;
    for (auto& it : this->output_node_dims)
    {
        output_tensor_size *= it;
    }
    std::vector<float>results(output_tensor_size);
    for (unsigned i = 0;i < output_tensor_size; i++)
    {
        results[i] = floatarr[i];
    }
    return results;
}
cv::Mat U2NetModel::predict(cv::Mat& input_tensor, int batch_size, int index)
{
    int input_tensor_size = input_tensor.cols * input_tensor.rows * 3;
    std::size_t counter = 0;//std::vector空间一次性分配完成,避免过多的数据copy
    std::vector<float>input_data(input_tensor_size);
    std::vector<float>output_data;
    try
    {
        for (unsigned k = 0; k < 3; k++)
        {
            for (unsigned i = 0; i < input_tensor.rows; i++)
            {
                for (unsigned j = 0; j < input_tensor.cols; j++)
                {
                    input_data[counter++]=static_cast<float>(input_tensor.at<cv::Vec3b>(i, j)[k]) / 255.0;
                }
            }
        }
    }
    catch (cv::Exception& e)
    {
        printf(e.what());
    }
    try
    {
        output_data = this->predict(input_data);
    }
    catch (Ort::Exception& e)
    {
        throw e;
    }
    cv::Mat output_tensor(output_data);
    output_tensor=output_tensor.reshape(1, { 320,320 })*255.0;
    std::cout << output_tensor.rows << " " << output_tensor.cols << "fuck" << std::endl;
    return output_tensor;
}

int main(int argc, char* argv[]) 
{
    U2NetModel model(L"u2net.onnx");
    cv::Mat image = cv::imread("horse.jpg");
    cv::resize(image, image, { 320, 320 },0.0,0.0, cv::INTER_CUBIC);//调整大小到320*320
    cv::imshow("image", image);                                     //打印原图片
    cv::cvtColor(image, image, cv::COLOR_BGR2RGB);                  //BRG格式转化为RGB格式
    auto result=model.predict(image);                               //模型预测
    cv::imshow("result", result);                                   //打印结果
    cv::waitKey(0);
}

在这里插入图片描述
到这里模型的部署和结果的展示就OKK了
不过直接把模型输出转化为图片显然结果并不是非常理想
所以现在还需要对数据进行后处理,对图片进行二值化处理
得到一个Mask掩码矩阵

    cv::Mat output_tensor(output_data);
    output_tensor=255.0-output_tensor.reshape(1, { 320,320 })*255.0;
    cv::threshold(output_tensor, output_tensor, 220, 255, cv::THRESH_BINARY_INV);
    return output_tensor;

在这里插入图片描述
现在结果就已经十分的理想了

完整测试代码

#include <assert.h>
#include <vector>
#include <ctime>
#include <iostream>
#include <onnxruntime_cxx_api.h>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/videoio.hpp>
class U2NetModel
{
public:
    U2NetModel(const wchar_t* onnx_model_path);
    std::vector<float> predict(std::vector<float>& input_data,int batch_size=1,int index=0);
    cv::Mat predict(cv::Mat& input_tensor, int batch_size = 1, int index = 0);
private:
    Ort::Env env;
    Ort::Session session;
    Ort::AllocatorWithDefaultOptions allocator;
    std::vector<const char*>input_node_names;
    std::vector<const char*>output_node_names;
    std::vector<int64_t> input_node_dims;
    std::vector<int64_t> output_node_dims;
};
U2NetModel::U2NetModel(const wchar_t* onnx_model_path):session(nullptr),env(nullptr)
{
    //初始化环境,每个进程一个环境,环境保留了线程池和其他状态信息
    this->env=Ort::Env(ORT_LOGGING_LEVEL_WARNING, "u2net");
    //初始化Session选项
    Ort::SessionOptions session_options;
    session_options.SetInterOpNumThreads(4);
    session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
    // 创建Session并把模型加载到内存中
    this->session=Ort::Session(env, onnx_model_path,session_options);
    //输入输出节点数量和名称
    size_t num_input_nodes = session.GetInputCount();
    size_t num_output_nodes = session.GetOutputCount();
    for (int i = 0; i < num_input_nodes; i++)
    {
        auto input_node_name = session.GetInputName(i, allocator);
        this->input_node_names.push_back(input_node_name);
        Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
        ONNXTensorElementDataType type = tensor_info.GetElementType();
        this->input_node_dims = tensor_info.GetShape();
    }
    for (int i = 0; i < num_output_nodes; i++)
    {
        auto output_node_name = session.GetOutputName(i, allocator);
        this->output_node_names.push_back(output_node_name);
        Ort::TypeInfo type_info = session.GetOutputTypeInfo(i);
        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
        this->output_node_dims = tensor_info.GetShape();
    }
}
std::vector<float> U2NetModel::predict(std::vector<float>& input_tensor_values,int batch_size,int index)
{
    this->input_node_dims[0] = batch_size;
    this->output_node_dims[0] = batch_size;
    float* floatarr = nullptr;
    try
    {
        std::vector<const char*>output_node_names;
        if (index != -1)
        {
            output_node_names = { this->output_node_names[index] };
        }
        else
        {
            output_node_names = this->output_node_names;
        }
        this->input_node_dims[0] = batch_size;
        auto input_tensor_size = input_tensor_values.size();
        auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
        auto output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
        assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
        floatarr = output_tensors.front().GetTensorMutableData<float>();
    }
    catch (Ort::Exception&e)
    {
        throw e;
    }
    int64_t output_tensor_size = 1;
    for (auto& it : this->output_node_dims)
    {
        output_tensor_size *= it;
    }
    std::vector<float>results(output_tensor_size);
    for (unsigned i = 0;i < output_tensor_size; i++)
    {
        results[i] = floatarr[i];
    }
    return results;
}
cv::Mat U2NetModel::predict(cv::Mat& input_tensor, int batch_size, int index)
{
    int input_tensor_size = input_tensor.cols * input_tensor.rows * 3;
    std::size_t counter = 0;//std::vector空间一次性分配完成,避免过多的数据copy
    std::vector<float>input_data(input_tensor_size);
    std::vector<float>output_data;
    try
    {
        for (unsigned k = 0; k < 3; k++)
        {
            for (unsigned i = 0; i < input_tensor.rows; i++)
            {
                for (unsigned j = 0; j < input_tensor.cols; j++)
                {
                    input_data[counter++]=static_cast<float>(input_tensor.at<cv::Vec3b>(i, j)[k]) / 255.0;
                }
            }
        }
    }
    catch (cv::Exception& e)
    {
        printf(e.what());
    }
    try
    {
        output_data = this->predict(input_data);
    }
    catch (Ort::Exception& e)
    {
        throw e;
    }
    cv::Mat output_tensor(output_data);
    output_tensor=255.0-output_tensor.reshape(1, { 320,320 })*255.0;
    cv::threshold(output_tensor, output_tensor, 220, 255, cv::THRESH_BINARY_INV);
    
    return output_tensor;
}
int main(int argc, char* argv[]) 
{
    U2NetModel model(L"u2net.onnx");
    cv::Mat image = cv::imread("horse.jpg");
    cv::resize(image, image, { 320, 320 },0.0,0.0, cv::INTER_CUBIC);//调整大小到320*320
    cv::imshow("image", image);                                     //打印原图片
    cv::cvtColor(image, image, cv::COLOR_BGR2RGB);                  //BRG格式转化为RGB格式
    auto result=model.predict(image);                               //模型预测
    cv::imshow("result", result);                                   //打印结果
    cv::waitKey(0);
}

参考:神经网络语义分割模型C++部署(VS2019+ONNXRuntime+OpenCV)

  • 2
    点赞
  • 22
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 4
    评论
要在 VS2022 中安装 ONNX Runtime C++ API,您可以按照以下步骤进行操作: 1. 下载 ONNX Runtime C++ API 的源代码:您可以从 ONNX Runtime 的 GitHub 存储库下载源代码。请确保选择与您的平台和配置相匹配的版本。 2. 打开 Visual Studio 2022:确保您已经成功安装了 Visual Studio 2022,并打开它。 3. 创建新项目:选择 "新建项目",然后选择适合您的项目类型(例如,控制台应用程序)。 4. 配置项目属性:在项目资源管理器中,右键单击您的项目,然后选择 "属性"。在属性窗口中,选择 "VC++ 目录" 选项卡。 5. 添加 ONNX Runtime 头文件目录:在 "包含目录" 字段中,添加 ONNX Runtime 头文件的路径。这取决于您下载的源代码的位置。 6. 添加 ONNX Runtime 库文件目录:在 "库目录" 字段中,添加 ONNX Runtime 库文件的路径。这取决于您下载的源代码的位置。 7. 链接 ONNX Runtime 库文件:在 "链接器" -> "输入" 选项卡中,添加 ONNX Runtime 库文件的名称。这些库文件通常以 `.lib` 或 `.dll` 结尾。 8. 编写代码:现在您可以开始编写使用 ONNX Runtime C++ API 的代码了。您需要包括 ONNX Runtime 的头文件,并在代码中使用相应的函数和类。 9. 构建和运行项目:构建项目并运行生成的可执行文件,以验证您的代码是否正常工作。 请注意,这只是一个大致的指南,具体的步骤可能会根据您的项目和配置有所不同。确保仔细阅读 ONNX Runtime 的文档,并遵循它们提供的详细说明来安装和使用 ONNX Runtime C++ API。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

落花逐流水

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值