概述
TVM是一个开源的机器学习编译器堆栈,用于使得机器学习模型能在各种硬件上高效地运行。它是由华盛顿大学的AWSL实验室开发,目前已被包括亚马逊、Facebook、微软、阿里巴巴、华为等多家公司采用。TVM主要有以下几个特点:
- 端到端的优化:TVM可以从深度学习框架中获取模型,然后进行一系列的优化,包括算子融合、计算图优化、内存优化等,最终生成高效的代码。
- 支持多种硬件:TVM可以支持包括CPU、GPU、FPGA、ASIC等多种硬件,包括NVIDIA、AMD、ARM、Mali等多种设备。
- 自动调参:TVM内置了自动调参工具AutoTVM,可以自动搜索最优的算子实现,大大提高了模型的运行效率。
- 支持多种前端框架:TVM支持多种深度学习框架,包括TensorFlow、PyTorch、MXNet、ONNX等,可以方便地将这些框架中的模型导入TVM进行优化。
- 灵活的编程模型:TVM提供了灵活的编程模型,用户可以使用TVM的编程语言TIR和计算图语言TE来自定义算子和计算图,实现更复杂的优化。
总的来说,TVM是一个功能强大的深度学习编译器,可以帮助用户在各种硬件上高效地运行深度学习模型。
模型部署流程
模型转换
import onnx
import tvm
import tvm.relay as relay
import os
def prepare_graph_lib(base_path):
onnx_model = onnx.load('./models/resnet50-v2-7.onnx')
input_name = "data"
shape_dict = {input_name: (1, 3, 224, 224)}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
target = "llvm"
with tvm.transform.PassContext(opt_level=3):
lib = relay.build(mod, target=target, params=params)
dylib_path = os.path.join(base_path, "resnet50.so")
lib.export_library(dylib_path)
if __name__ == "__main__":
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
prepare_graph_lib(os.path.join(curr_path, "lib"))
模型部署
python部署
import cv2
import tvm
import numpy as np
import tvm.contrib.graph_executor as runtime
def verify(frame):
mod_dylib = tvm.runtime.load_module("./lib/resnet50.so")
target = "llvm"
dev = tvm.device(str(target), 0)
tvm_resModel = runtime.GraphModule(mod_dylib["default"](dev))
tvm_resModel.set_input("data", tvm.nd.array(frame.astype("float32")))
tvm_resModel.run()
out_deploy = tvm_resModel.get_output(0).asnumpy()
print("End")
if __name__ == "__main__":
# read and preprocess image
image_bgr = cv2.imread("./img/kitten.jpg")
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
img = cv2.resize(image_rgb, (224, 224))
# doinference
verify(img)
C++部署
#include <dlpack/dlpack.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/packed_func.h>
#include <opencv2/opencv.hpp>
#include <string>
#include <iostream>
class TVMResNet {
public:
explicit TVMResNet(const std::string& lib_path)
{
DLDevice dev{kDLCPU, 0};
mod_factory = tvm::runtime::Module::LoadFromFile(lib_path, "so");
gmod = mod_factory.GetFunction("default")(dev);
set_input = gmod.GetFunction("set_input");
get_output = gmod.GetFunction("get_output");
run = gmod.GetFunction("run");
// Use the C++ API
x = tvm::runtime::NDArray::Empty({1, 3, 224, 224}, DLDataType{kDLFloat, 32, 1}, dev);
y = tvm::runtime::NDArray::Empty({1, 1000}, DLDataType{kDLFloat, 32, 1}, dev);
}
void inference(cv::Mat frame)
{
cv::cvtColor(frame, frame, cv::COLOR_BGR2RGB);
cv::resize(frame, frame, cv::Size(224, 224));
cv::Mat img_float;
frame.convertTo(img_float, CV_32F);
x.CopyFromBytes(img_float.data, 1 * 3 * 224 * 224 * sizeof(float));
set_input("data", x);
run();
get_output(0, y);
auto result = static_cast<float*>(y->data);
for (int i = 0; i < 3; i++)
std::cout<<result[i]<<std::endl;
}
private:
// models
tvm::runtime::Module mod_factory;
tvm::runtime::Module gmod;
tvm::runtime::PackedFunc set_input;
tvm::runtime::PackedFunc get_output;
tvm::runtime::PackedFunc run;
// datas
tvm::runtime::NDArray x;
tvm::runtime::NDArray y;
};
int main()
{
TVMResNet res_net("/data/code/tvm/tvm_sample/lib/resnet50.so");
cv::Mat frame = cv::imread("/data/code/tvm/tvm_sample/cpp_deploy/img/kitten.jpg");
res_net.inference(frame);
return 0;
}
cmake_minimum_required(VERSION 3.18)
project(resnet_demo)
set(CMAKE_CXX_STANDARD 14)
set(EXECUTABLE_OUTPUT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/bin")
set(LIBRARY_OUTPUT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/lib")
set(TVM_ROOT "/data/code/tvm/tvm")
set(DMLC_CORE "${TVM_ROOT}/3rdparty/dmlc-core")
set(CMAKE_CXX_FLAGS "-std=c++17 ${CMAKE_CXX_FLAGS}")
set(TVM_RUNTIME_LIB "${TVM_ROOT}/build/libtvm_runtime.so")
include_directories(${TVM_ROOT}/include)
include_directories(${DMLC_CORE}/include)
include_directories(${TVM_ROOT}/3rdparty/dlpack/include)
# cuda 11.1 is recommended .
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
message(CUDA libs: ${CUDA_LIBRARIES})
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
cuda_add_executable(resnet_deploycuda cpp_deploy.cc)
target_link_libraries(resnet_deploycuda
${CUDA_LIBRARIES}
${TVM_RUNTIME_LIB}
${OpenCV_LIBRARIES}
rt
cuda)