源码:GitHub - laughtervv/DepthAwareCNN: Depth-aware CNN for RGB-D Segmentation, ECCV 2018
环境:Win10,python 3.7,torch 1.7.1+cu110
depthconv_cuda文件的debug
//nDimension全部替换为dim()
//size[x]全部替换为size(x)
//THCState_getCurrentStream(state)都替换为c10::cuda::getCurrentCUDAStream()
//THCudaBlas_Sgemv替换为at::cuda::blas::gemv<double>,加入头文件#include <ATen/cuda/CUDABlas.h> 该方法能够编译,但是无法链接
//THCudaBlas_xxxxx方法定义于D:\ProgramTensor\Anaconda3\envs\pytorch\Lib\site-packages\torch\include\THC\THCblas.h
//最终使用cublasSgemv,加入头文件#include <ATen/cuda/CUDABlas.h>
//参考DCNv2 https://github.com/lbin/DCNv2/tree/pytorch_1.6 https://github.com/lbin/DCNv2/
//链接错误:将 extern THCState *state; 改成THCState *state = at::globalContext().lazyInitCUDA();
使用nvcc命令编译depthconv_cuda_kernel.cu
具体指令参考make.sh
使用cl命令编译depthconv.cpp和depthconv_cuda.cpp
depthconv.c和depthconv_cuda.c修改后缀为.cpp
cl -DMS_WIN64 -I "C:\Program Files (x86)\Windows Kits\10\Include\10.0.17763.0\shared" -I"C:\Program Files (x86)\Windows Kits\10\Include\10.0.17763.0\ucrt" -I"D:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC\Tools\MSVC\14.16.27023\include" -ID:\ProgramTensor\Anaconda3\envs\pytorch\lib\site-packages\torch\include -I"D:\ProgramTensor\Anaconda3\envs\pytorch\lib\site-packages\torch\include\torch\csrc\api\include" -I"D:\ProgramTensor\Anaconda3\envs\pytorch\lib\site-packages\torch\include\TH" -I"D:\ProgramTensor\Anaconda3\envs\pytorch\lib\site-packages\torch\include\THC" -I"D:\ProgramTensor\NVIDIA\CUDA\v10.0\include" -I"D:\ProgramTensor\Anaconda3\envs\pytorch\include" -I"D:\ProgramTensor\Anaconda3\envs\pytorch\include" -IE:\ZTensor\programs\DepthAwareCNN-master\models\ops\depthconv\src -c E:\ZTensor\programs\DepthAwareCNN-master\models\ops\depthconv\src\depthconv.c /FoE:\ZTensor\programs\DepthAwareCNN-master\models\ops\depthconv\build\temp.win-amd64-3.7\Release\src\depthconv.o -Zi -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=depthconv -D_GLIBCXX_USE_CXX11_ABI=0
cl -DMS_WIN64 -I "C:\Program Files (x86)\Windows Kits\10\Include\10.0.17763.0\shared" -I"C:\Program Files (x86)\Windows Kits\10\Include\10.0.17763.0\ucrt" -I"D:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC\Tools\MSVC\14.16.27023\include" -ID:\ProgramTensor\Anaconda3\envs\pytorch\lib\site-packages\torch\include -I"D:\ProgramTensor\Anaconda3\envs\pytorch\lib\site-packages\torch\include\torch\csrc\api\include" -I"D:\ProgramTensor\Anaconda3\envs\pytorch\lib\site-packages\torch\include\TH" -I"D:\ProgramTensor\Anaconda3\envs\pytorch\lib\site-packages\torch\include\THC" -I"D:\ProgramTensor\NVIDIA\CUDA\v10.0\include" -I"D:\ProgramTensor\Anaconda3\envs\pytorch\include" -I"D:\ProgramTensor\Anaconda3\envs\pytorch\include" -IE:\ZTensor\programs\DepthAwareCNN-master\models\ops\depthconv\src -c E:\ZTensor\programs\DepthAwareCNN-master\models\ops\depthconv\src\depthconv_cuda.cpp /FoE:\ZTensor\programs\DepthAwareCNN-master\models\ops\depthconv\build\temp.win-amd64-3.7\Release\src\depthconv_cuda.o -Zi -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=depthconv -D_GLIBCXX_USE_CXX11_ABI=0
添加bindings.cpp,并使用cl编译
该cpp用于打包cuda/c++中的函数并导出
#pragma once
#include <pybind11/pybind11.h>
#include <THC/THC.h>
#include "depthconv.h"
#include "depthconv_cuda.h"
#include "depthconv_cuda_kernel.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("depthconv_forward", &depthconv_forward);
m.def("depthconv_backward", &depthconv_backward);
m.def("depthconv_forward_cuda", &depthconv_forward_cuda);
m.def("depthconv_backward_input_cuda", &depthconv_backward_input_cuda);
m.def("depthconv_backward_parameters_cuda", &depthconv_backward_parameters_cuda);
//m.def("depthconv_im2col", &depthconv_im2col);
//m.def("depthconv_col2im", &depthconv_col2im);
}
使用link命令链接
link -dll E:\ZTensor\programs\DepthAwareCNN-master\models\ops\depthconv\build\temp.win-amd64-3.7\Release\src\depthconv.o E:\ZTensor\programs\DepthAwareCNN-master\models\ops\depthconv\build\temp.win-amd64-3.7\Release\src\depthconv_cuda.o E:\ZTensor\programs\DepthAwareCNN-master\models\ops\depthconv\build\temp.win-amd64-3.7\Release\src \depthconv_cuda_kernel.o E:\ZTensor\programs\DepthAwareCNN-master\models\ops\depthconv\build\temp.win-amd64-3.7\Release\src \bindings.o /libpath:"D:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC\Tools\MSVC\14.16.27023\lib\onecore\x64" /libpath:"D:\ProgramTensor\Anaconda3\libs" /libpath:"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.17763.0\um\x64" /libpath:"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.17763.0\ucrt\x64" -LIBPATH:D:\ProgramTensor\Anaconda3\envs\pytorch\lib\site-packages\torch\lib -LIBPATH:D:\ProgramTensor\NVIDIA\CUDA\v10.0\lib/x64 -LIBPATH:D:\ProgramTensor\Anaconda3\envs\pytorch\libs -LIBPATH:D:\ProgramTensor\Anaconda3\envs\pytorch\PCbuild\amd64 c10.lib c10_cuda.lib torch.lib torch_cpu.lib torch_cuda.lib torch_python.lib cudart.lib cublas.lib python37.lib -OUT:E:\ZTensor\programs\DepthAwareCNN-master\models\ops\depthconv\build\lib.win-amd64-3.7\_ext\depthconv.cp37-win_amd64.pyd
以上步骤为单步编译、链接。
按上述修改程序后,其实可以直接构建setup.py,一次性编译。
具体过程请参见《Win10下CUDA/C++的混合编译——PCT中PointNet2模块》
调用
拷贝depthconv.cp37-win_amd64.pyd至depthconv\_ext目录下,即可调用。
实际调用中存在问题:函数定义中输入张量均为THCudaTensor *,但是传入的都是torch.Tensor,运行提示参数类型不支持。对此有两种解决方案:
1)改变传入参数的类型(需要高手加以指点);
2)修改CUDA/C++源代码,替换THCudaTensor *指针为at::Tensor对象(可参考https://github.com/lbin/DCNv2/blob/pytorch_1.6/src/cuda/dcn_v2_cuda.cu)。
成功配置VS+CUDA后,可以一次性编译,参见
《Win10下CUDA/C++的混合编译——更新》