前言
将sklearn模型导出成onnx并用C++部署。
一、模型准备
根据自己的机器学习需求任务,使用sklearn训练。为方便起见,本文使用sklearn自带的数据集进行演示,并用gbdt训练。
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import GradientBoostingClassifier
if __name__ == '__main__':
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
gbdt = GradientBoostingClassifier()
gbdt.fit(X_train, y_train)
gbdt_val = gbdt.predict(X_test)
print(mean_absolute_error(y_test, gbdt_val))
二、模型导出
将sklearn训练好的模型导出成onnx模型,利用skl2onnx这个包即可。
pip install skl2onnx
代码示例:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
initial_type = [('float_input', FloatTensorType([1, 4]))]
onx = convert_sklearn(gbdt, initial_types=initial_type)
with open("gbdt_iris.onnx", "wb") as f:
f.write(onx.SerializeToString())
FloatTensorType([1, 4])输入大小要根据自己的数据情况输入,由于iris输入是4个维度,故这里设置成(1,4)
三、onnx模型推理(python实现)
代码如下:
import numpy as np
import onnxruntime as rt
if __name__ == '__main__':
onnx_model = r"F:\working\project\model_file\gbdt_iris.onnx"
sess = rt.InferenceSession(
onnx_model, providers=rt.get_available_providers())
inputs = np.array([5.1, 3.5, 1.4, 0.2], dtype=np.float32).reshape(1, 4)
input_name = sess.get_inputs()[0].name
pred_onx = sess.run(None, {input_name: inputs})[0]
print(pred_onx)
四、onnx模型推理(C++实现)
使用visual studio先配置好onnxruntime的C++版本:
具体下载及配置网上教程很多这里不在赘述。
#pragma once
#include <onnxruntime_cxx_api.h>
#include <string>
#include <iostream>
#include <algorithm>
class Detector
{
public:
Detector(const std::string& modelPath, const bool& isGPU);
int detect(std::vector<float> inputs);
private:
Ort::Env env{ nullptr };
Ort::SessionOptions session_options{ nullptr };
Ort::Session session{ nullptr };
std::vector<std::string> inputNames;
std::vector<std::string> outputNames;
};
#include "detector.h"
#include "utils.h"
Detector::Detector(const std::string& modelPath, const bool& isGPU)
{
env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "ONNX_DETECTION");
session_options = Ort::SessionOptions();
std::vector<std::string> availableProviders = Ort::GetAvailableProviders();
auto cudaAvailable = std::find(availableProviders.begin(), availableProviders.end(), "CUDAExecutionProvider");
OrtCUDAProviderOptions cudaOption;
if (isGPU && (cudaAvailable == availableProviders.end()))
{
std::cout << "GPU is not supported by your ONNXRuntime build. Fallback to CPU." << std::endl;
std::cout << "Inference device: CPU" << std::endl;
}
else if (isGPU && (cudaAvailable != availableProviders.end()))
{
std::cout << "Inference device: GPU" << std::endl;
session_options.AppendExecutionProvider_CUDA(cudaOption);
}
else
{
std::cout << "Inference device: CPU" << std::endl;
}
std::wstring w_modelPath = utils::charToWstring(modelPath.c_str());
session = Ort::Session(env, w_modelPath.c_str(), session_options);
Ort::AllocatorWithDefaultOptions allocator;
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
std::vector<int64_t> inputTensorShape = inputTypeInfo.GetTensorTypeAndShapeInfo().GetShape();
std::cout << std::endl;
inputNames.push_back(session.GetInputNameAllocated(0, allocator).get());
outputNames.push_back(session.GetOutputNameAllocated(0, allocator).get());
//std::cout << "Input name: " << inputNames[0] << std::endl;
//std::cout << "Output name: " << outputNames[0] << std::endl;
}
int Detector::detect(std::vector<float> inputs)
{
float* blob = inputs.data();
std::vector<int64_t> inputTensorShape{ 1, 4 };
size_t inputTensorSize = utils::vectorProduct(inputTensorShape);
std::vector<float> inputTensorValues(blob, blob + inputTensorSize);
std::vector<Ort::Value> inputTensors;
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault
);
inputTensors.push_back(Ort::Value::CreateTensor<float>(
memoryInfo, inputs.data(), inputTensorSize, // .data()返回一个指向向量内部使用的数组中第一个元素的指针
inputTensorShape.data(), 2
));
std::vector<const char*> input_names_char(inputNames.size(), nullptr);
std::transform(std::begin(inputNames), std::end(inputNames), std::begin(input_names_char),
[&](const std::string& str) { return str.c_str(); });
std::vector<const char*> output_names_char(outputNames.size(), nullptr);
std::transform(std::begin(outputNames), std::end(outputNames), std::begin(output_names_char),
[&](const std::string& str) { return str.c_str(); });
std::vector<Ort::Value> outputTensors = this->session.Run(Ort::RunOptions{ nullptr },
input_names_char.data(),
inputTensors.data(),
1,
output_names_char.data(),
1);
auto* rawOutput = outputTensors[0].GetTensorData<int>(); // 数据所在指针 int数据类型跟要输出的结果有关,类别是整数故是int
int final_res = int(*rawOutput);
return final_res;
}
部分代码如上所示,详细代码可以看我的GitHub。
总结
本文主要介绍了如何将sklearn导出成onnx模型,并使用onnx模型进行推理及C++部署。