本人第一次在工作中使用cuda,它有很多很奇怪的行为,所以记录下来。
- 我需要使用另外一个文件中的__device__函数
- 根据官方文档,cuda的库需要编译成静态库才能使用,尝试后失败,各种符号未定义(nvcc报错)
- 根据网上的说法在cmake设定了重定向,但是没用
- 感谢Stackoverflow大佬的示例代码,让我跑成功了
- https://gitlab.kitware.com/cmake/cmake/-/tree/master/Tests/CudaOnly/ResolveDeviceSymbols
1. 目录结构
2. 每个文件的内容
2.1 sub1/cmakelist
project(CudaResolveSymbolOfMySub1)
set(SOURCE_LIST
include/printHello.cuh src/printHello.cu)
set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
set(PUBLIC_HEADER_LIST include/printHello.cuh)
set(BUILD_FOR_PY OFF)
SegModuleBuild(
${PROJECT_NAME}
"${SOURCE_LIST}" "${SOURCE_DIR}"
"${PUBLIC_HEADER_LIST}" ${BUILD_FOR_PY} OFF
)
2.2 sub1其他两个
cuh:
#ifndef CVCUDAIMPL_PRINTHELLO_CUH
#define CVCUDAIMPL_PRINTHELLO_CUH
extern "C" __device__ void printHello();
#endif //CVCUDAIMPL_PRINTHELLO_CUH
cu:
#include "../include/printHello.cuh"
#include "stdio.h"
__device__ void printHello(){
printf("sub1 print hello \n");
}
2.3 外部cmake和main
cmake:
project(CudaResolveSymbolOfMy CUDA)
#add_subdirectory(sub1)
find_package(CudaResolveSymbolOfMySub1)
add_executable(${PROJECT_NAME} main.cu)
set_target_properties(${PROJECT_NAME}
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON
POSITION_INDEPENDENT_CODE ON
)
target_link_libraries(
${PROJECT_NAME}
PRIVATE
CudaResolveSymbolOfMySub1::CudaResolveSymbolOfMySub1
)
main.cpp
//
// Created by tacom on 22-9-4.
//
#include "printHello.cuh"
__global__ void start(){
printHello();
}
int main(){
start<<<1, 2>>>();
cudaDeviceSynchronize();
}
2.4 SegModuleBuild函数
function(SegModuleBuild
project_name
source_list
source_dir
public_header_list
build_for_py
build_for_shared)
# project_name: module name
# source_list: header, src, wrapper or test
# source_dir: source abs path
# public_header_list: which header to install
# build_for_py: use pybind11_add_module or add_library
if(${build_for_py})
pybind11_add_module(${project_name} ${source_list})
elseif(${build_for_shared})
add_library(${project_name} SHARED ${source_list})
else()
add_library(${project_name} STATIC ${source_list})
target_compile_features(${project_name} PUBLIC cuda_std_11)
endif()
target_include_directories(${project_name}
INTERFACE
$<BUILD_INTERFACE:${source_dir}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)
set_target_properties(${project_name}
PROPERTIES
PUBLIC_HEADER ${public_header_list}
CUDA_SEPARABLE_COMPILATION ON # cu will build by nvcc
POSITION_INDEPENDENT_CODE ON # for split __device__ into multi *.a
LINKER_LANGUAGE CXX
)
install(
TARGETS ${project_name}
EXPORT ${project_name}Targets
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib
)
install(
EXPORT ${project_name}Targets
FILE ${project_name}Targets.cmake
DESTINATION ${CMAKE_INSTALL_DATADIR}/${project_name}/cmake
NAMESPACE ${project_name}::
)
configure_package_config_file(
${CMAKE_SOURCE_DIR}/cmake/templates/NeedOpenCV.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/${project_name}Config.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_DATADIR}/${project_name}/cmake
)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/${project_name}Config.cmake
DESTINATION ${CMAKE_INSTALL_DATADIR}/${project_name}/cmake
)
endfunction()
3. 使用方法
- 配置好工具链
- 把外部cmake的add_subdirectory(sub1)打开,注释后面内容,并安装这个lib,以及head
- 我的两个cmake设定
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install) # set file install path
set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_BINARY_DIR}/install) # set to find self package
- 和第二步相反,就能通过编译并且运行了,不是xxx符号找不到错误了
4. 总结
静态链接的lib库:
- 在编译的时候要使用
POSITION_INDEPENDENT_CODE ON
告诉cmake保留__device__
这些符号 - 在另外一个
__host__
调用这个lib里的__device__
时,需要告诉nvcc使用重定向CUDA_RESOLVE_DEVICE_SYMBOLS ON