tf模型在C++部署

wp133716

已于 2023-11-23 19:52:55 修改

阅读量716

点赞数 2

文章标签： c++ 深度学习人工智能

于 2023-03-23 16:51:51 首次发布

本文链接：https://blog.csdn.net/wp133716/article/details/129732675

版权

tensorflow训练好的模型使用ONNX Runtime在C++部署

tf模型转onnx使用tf2onnx,在前面的文章有讲到c++调用tf.keras的模型.

环境

ubuntu20.04
cuda 11.6
cudnn 8.2.4

参考链接

ONNX Runtime

ONNX (Open Neural Network Exchange) 是微软和脸书主导的深度学习开发工具生态系统，ONNX Runtime (简称 ORT) 则是微软开发的跨平台高性能机器学习训练与推理加速器，根据官方的说法推理/训练速度最高能有 17X/1.4X 的提升，其优异的性能非常适合深度学习模型部署。

克隆

git clone --recursive https://github.com/Microsoft/onnxruntime
cd onnxruntime/
git checkout v1.13.0

ONNXRuntime版本和cuda、cudnn版本要对应，具体参考官方链接:https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html。
这里选择了1.13.0版本

ONNX Runtime	CUDA	cuDNN
1.14 /1.13.1 / 1.13	11.6	8.2.4 (Linux) / 8.5.0.96 (Windows)

编译

./build.sh --skip_tests --use_cuda --config Release --build_shared_lib --parallel --cuda_home /usr/local/cuda-11.6 --cudnn_home /usr/local/cuda-11.6

–use_cuda表示build with CUDA support，cuda_home和cudnn_home指向cuda和cudnn的安装路径

注意
编译过程中会链接其它github仓库(大概几十个)，可能因为网络问题导致编译失败，需要科学上网或者手动添加镜像源

cd ${your git repo root}
cd .git
vim config

修改为：

[core]
    repositoryformatversion = 0
    filemode = true
    bare = false
    logallrefupdates = true
    ignorecase = true
    precomposeunicode = true
[remote "origin"]
    url = https://github.com.cnpmjs.org/microsoft/onnxruntime.git
    fetch = +refs/tags/v1.13.0:refs/tags/v1.13.0

编译完成，安装

cd ./build/Linux/release
make install DESTDIR=想要安装的路径

配置环境变量

# onnxruntime
export ONNX_HOME=/home/user/3rd-party/onnx/usr/local
export PATH=$PATH:$ONNX_HOME/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ONNX_HOME/lib
export LIBRARY_PATH=$LIBRARY_PATH:$ONNX_HOME/lib
export C_INCLUDE_PATH=$C_INCLUDE_PATH:$ONNX_HOME/include
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:$ONNX_HOME/include

测试代码

#include <iostream>
#include <vector>
#include <chrono>
#include <string>
#include <vector>
#include <onnxruntime/core/session/onnxruntime_cxx_api.h>

using namespace std;


int main()
{
    Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
    Ort::SessionOptions session_options;

    OrtCUDAProviderOptions cuda_options{
          0,
          OrtCudnnConvAlgoSearch::EXHAUSTIVE,
          std::numeric_limits<size_t>::max(),
          0,
          true
      };

    session_options.AppendExecutionProvider_CUDA(cuda_options);
    const char* model_path = "../model_test.onnx";

    Ort::Session session(env, model_path, session_options);
    // print model input layer (node names, types, shape etc.)
    Ort::AllocatorWithDefaultOptions allocator;

    // print number of model input nodes
    size_t num_input_nodes = session.GetInputCount();

    std::vector<const char*> input_node_names = {"input_0", "input_1"};
    std::vector<const char*> output_node_names = {"dense_2", "tf.math.multiply_2"};

    std::vector<int64_t> input_node_dims = {1, 50, 9};
    std::vector<int64_t> input_node_dims2 = {1, 50, 2};
    
    // 设置输入
    size_t input_tensor_size = 50 * 9;
    size_t input_tensor_size2 = 50 * 2;
    std::vector<float> input_tensor_values(input_tensor_size);
    std::vector<float> input_tensor_values2(input_tensor_size2);

    //测试100次所需的推理时间
    auto start = std::chrono::system_clock::now();
    for(int i=0; i<10; i++)
    {   
        // 测试每次推理所需时间
        auto start2 = std::chrono::system_clock::now();
        for (unsigned int i = 0; i < input_tensor_size; i++)
            input_tensor_values[i] = 1.f;
        for (unsigned int i = 0; i < input_tensor_size2; i++)
            input_tensor_values2[i] = 1.f;
        // create input tensor object from data values 
        auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        auto memory_info2 = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(),
                                                                input_tensor_size, input_node_dims.data(), 3);
        Ort::Value input_tensor2 = Ort::Value::CreateTensor<float>(memory_info2, input_tensor_values2.data(),
                                                                input_tensor_size2, input_node_dims2.data(), 3);
        std::vector<Ort::Value> ort_inputs;
        ort_inputs.push_back(std::move(input_tensor));
        ort_inputs.push_back(std::move(input_tensor2));
        auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), ort_inputs.data(),
                                        ort_inputs.size(), output_node_names.data(), 1);
        float* floatarr = output_tensors[0].GetTensorMutableData<float>();
        for (int i=0; i<4; i++)
        {
            std::cout<<floatarr[i]<<std::endl;
        }

        auto end2 = std::chrono::system_clock::now();
        std::chrono::duration<double> elapsed_seconds2 = end2-start2;
        std::cout << "elapsed time: " << elapsed_seconds2.count() << "s\n";
    }
    auto end = std::chrono::system_clock::now();
    std::chrono::duration<double> elapsed_seconds = end-start;
    std::cout << "elapsed time: " << elapsed_seconds.count() << "s\n";

    return 0;
}

CMakeLists

cmake_minimum_required(VERSION 3.13)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)

link_directories(
    /home/onnx/lib
)

add_executable(onnx onnx_test.cpp)
target_link_libraries(onnx onnxruntime onnxruntime_providers_shared)