千呼万唤始出来
在PyTorch的1.4版本和torchvision的0.5.0版本中,PyTorch可以直接使用libtorch加载FasterRCNN等模型。之前版本支持仅包含conv等网络层模型的C++加载,这次更新增加对RoIAlign/RoIPool/PSRoIAlign/PSRoIPool/nms等网络层的支持,因此对于检测算法中的FasterRCNN、MaskRCNN等网络也可以使用C++加载。
环境配置
torch==1.4.0
torchvision==0.5.0
cmake>=3.13
torchvision
- libtorch
根据系统环境下载对应版本直接解压即可,我使用的libtorch是cuda10.1版本。
- torchvision
下载源码然后编译,注意编译前需要部分修改CMakeLists.t然后编译,注意编译前需要部分修改CMakeLists.txt。
git clone https://github.com/pytorch/vision.git
cd vision
cd build && rm * -rf
cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch -DCMAKE_INSTALL_PREFIX=/where/to/install/torchvision -DCMAKE_BUILD_TYPE=Release -DWITH_CUDA=ON ..
cmake --build . -j 20
cmake --install .
make install
其中CMakeLists.txt需要修改两个地方:
1、安装pybind11并设置路径
+set(pybind11_DIR /path/to/pybind11/share/cmake/pybind11)
2、增加lib(cmake更新为后3.16后不需要修改)
install(TARGETS ${PROJECT_NAME}
- EXPORT TorchVisionTargets)
+ EXPORT TorchVisionTargets
+ DESTINATION lib)
编译通过后在torchvision的安装路径下会生成include/lib/share三个文件夹,分别包含头文件、静态库文件和cmake文件。
至此环境准备工作结束。
工程文件
1、FasterRCNN模型
将预训练模型转化为JIT模型并且保存,根据需要保存GPU版本或者CPU版本
import
2、工程文件
example-app.cpp
#include <torch/script.h> // One-stop header.
#include <iostream>
#include <memory>
#include <iostream>
#include "torch/torch.h"
#include "torchvision/vision.h"
#include "torchvision/ROIAlign.h"
#include "torchvision/ROIPool.h"
#include "torchvision/empty_tensor_op.h"
#include "torchvision/nms.h"
#include <cuda.h>
using namespace std;
static auto registry =
torch::RegisterOperators()
.op("torchvision::nms", &nms)
.op("torchvision::roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio) -> Tensor",
&roi_align)
.op("torchvision::roi_pool", &roi_pool)
.op("torchvision::_new_empty_tensor_op", &new_empty_tensor);
int main(int argc, const char* argv[]) {
if (argc != 2) {
std::cerr << "usage: example-app <path-to-exported-script-module>n";
return -1;
}
int gpu_id = 0;
torch::jit::getProfilingMode() = false;
torch::jit::getExecutorMode() = false;
torch::jit::setGraphExecutorOptimize(false);
torch::jit::script::Module module;
try {
// Deserialize the ScriptModule from a file using torch::jit::load().
module = torch::jit::load(argv[1]);
c10::List<at::Tensor> images;
module.to(torch::Device(torch::kCUDA, gpu_id));
at::Tensor tensor_image = torch::ones({3, 224, 224});
tensor_image = tensor_image.to(torch::Device(torch::kCUDA, gpu_id));
images = c10::List<at::Tensor>({tensor_image});
std::vector<torch::jit::IValue> inputs;
inputs.emplace_back(images);
torch::jit::IValue output = module.forward(inputs);
auto out1 = output.toTuple();
auto dets = out1->elements().at(1).toGenericList();
auto det0 = dets.get(0).toGenericDict() ;
at::Tensor labels = det0.at("labels").toTensor();
at::Tensor boxes = det0.at("boxes").toTensor();
at::Tensor scores = det0.at("scores").toTensor();
std::cout << labels << std::endl;
std::cout << scores << std::endl;
}
catch (const c10::Error& e) {
std::cerr << "error loading the modeln";
return -1;
}
std::cout << "okn";
}
CMakeLists.txt
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(custom_ops)
add_compile_definitions(WITH_CUDA)
set(pybind11_DIR /path/to/pybind11/share/cmake/pybind11)
find_package(Torch REQUIRED)
find_package(TorchVision REQUIRED)
add_executable(example-app example-app.cpp)
target_link_libraries(example-app "${TORCH_LIBRARIES}" TorchVision::TorchVision)
set_property(TARGET example-app PROPERTY CXX_STANDARD 14)
或者
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(custom_ops)
add_compile_definitions(WITH_CUDA)
set(pybind11_DIR /path/to/pybind11/share/cmake/pybind11)
find_package(Torch REQUIRED)
set(BASE_LIBS /path/to/vision/build/libtorchvision.so)
include_directories(/where/to/install/torchvision/include /path/to/python3.8)
add_executable(example-app example-app.cpp)
target_link_libraries(example-app PUBLIC ${BASE_LIBS} "${TORCH_LIBRARIES}")
set_property(TARGET example-app PROPERTY CXX_STANDARD 14)
3、编译过程
mkdir build
rm build/* -rf && cd build
cmake -DCMAKE_PREFIX_PATH="/path/to/libtorch;/where/to/install/torchvision" ..
cmake --build . --config Release
./example-app ../../rcnn_gpu.pt
4、运行结果
[ CUDALongType{0} ]
[ CUDAFloatType{0} ]
ok
参考资料
https://pytorch.org/tutorials/advanced/cpp_export.html
https://pytorch.org/blog/pytorch-1-dot-4-released-and-domain-libraries-updated/