目录
背景介绍
本文基于pytorch 1.7 分析它是如何扩展cmake编译脚本,支持使用nvidia的cuda sdk编译cuda cu 源代码的原理
编译架构
从上图可以看到,CMakeLists.txt中使用了扩展的API cuda_add_library/cuda_add_executable 编译cu文件,在cmake目录下面则实现了这两个扩展API以达到使用cuda toolchain编译cu文件的目标
代码分析
FindCUDA.cmak
从架构图可以看到FindCUDA.cmak 是实现扩展API cuda_add_library api的主要文件,分析如下
# -- Creates an executable "cuda_target" which is made up of the files
# specified. All of the non CUDA C files are compiled using the standard
# build rules specified by CMAKE and the cuda files are compiled to object
# files using nvcc and the host compiler. In addition CUDA_INCLUDE_DIRS is
# added automatically to include_directories(). Some standard CMake target
# calls can be used on the target after calling this macro
# (e.g. set_target_properties and target_link_libraries), but setting
# properties that adjust compilation flags will not affect code compiled by
# nvcc. Such flags should be modified before calling CUDA_ADD_EXECUTABLE,
# CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS.
#
# CUDA_ADD_LIBRARY( cuda_target file0 file1 ...
# [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
# -- Same as CUDA_ADD_EXECUTABLE except that a library is created.
注释已经把这个api说明的很清楚了,cuda_add_executable 会用nvcc编译cuda files成为可执行文件,cuda_add_library则变成变成lib
###############################################################################
###############################################################################
# ADD LIBRARY
###############################################################################
###############################################################################
macro(CUDA_ADD_LIBRARY cuda_target)
CUDA_ADD_CUDA_INCLUDE_ONCE()
# Separate the sources from the options
CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
CUDA_BUILD_SHARED_LIBRARY(_cuda_shared_flag ${ARGN})
# Create custom commands and targets for each file.
CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources}
${_cmake_options} ${_cuda_shared_flag}
OPTIONS ${_options} )
# Compute the file name of the intermedate link file used for separable
# compilation.
CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
# Add the library.
add_library(${cuda_target} ${_cmake_options}
${_generated_files}
${_sources}
${link_file}
)
# Add a link phase for the separable compilation if it has been enabled. If
# it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS
# variable will have been defined.
CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
${CUDA_LIBRARIES}
)
if(CUDA_SEPARABLE_COMPILATION)
target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
${CUDA_cudadevrt_LIBRARY}
)
endif()
# We need to set the linker language based on what the expected generated file
# would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
set_target_properties(${cuda_target}
PROPERTIES
LINKER_LANGUAGE ${CUDA_C_OR_CXX}
)
endmacro()
CUDA_GET_SOURCES_AND_OPTIONS 提取出编译参数 ${_options} ${_cmake_options},编译的源码文件 ${_sources}
CUDA_BUILD_SHARED_LIBRARY 提取编译参数 ${_cuda_shared_flag}
CUDA_WRAP_SRCS 将源码调用nvcc 进行编译
注释:为每个要编译的源码在build目录下生成几个中间文件,其中最重要的是.cmake文件
# Set all of our file names. Make sure that whatever filenames that have
# generated_file_path in them get passed in through as a command line
# argument, so that the ${CMAKE_CFG_INTDIR} gets expanded at run time
# instead of configure time.
set(generated_file "${generated_file_path}/${generated_file_basename}")
set(cmake_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.depend")
set(NVCC_generated_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.NVCC-depend")
set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt")
set(custom_target_script_pregen "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake.pre-gen")
set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}$<$<BOOL:$<CONFIG>>:.$<CONFIG>>.cmake")
...
注释:这个.cmake 中间文件是由run_nvcc.cmake作为模板实例化里面的变量生成的
# Configure the build script
configure_file("${CUDA_run_nvcc}" "${custom_target_script_pregen}" @ONLY)
file(GENERATE
OUTPUT "${custom_target_script}"
INPUT "${custom_target_script_pregen}"
)
...
注释:通过cmake build-in的api add_custom_command 调用.cmake 中间文件将源文件使用nvcc编译成为期望的目标文件(比如 .o)
# Build the generated file and dependency file ##########################
add_custom_command(
OUTPUT ${generated_file}
# These output files depend on the source_file and the contents of cmake_dependency_file
${main_dep}
DEPENDS ${CUDA_NVCC_DEPEND}
DEPENDS ${custom_target_script}
# Make sure the output directory exists before trying to write to it.
COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}"
COMMAND ${CMAKE_COMMAND} ARGS
-D verbose:BOOL=${verbose_output}
${ccbin_flags}
-D build_configuration:STRING=${CUDA_build_configuration}
-D "generated_file:STRING=${generated_file}"
-D "generated_cubin_file:STRING=${generated_cubin_file}"
-P "${custom_target_script}"
WORKING_DIRECTORY "${cuda_compile_intermediate_directory}"
COMMENT "${cuda_build_comment_string}"
${_verbatim}
)
注释:将nvcc编译出来的目标文件链接成为期望的${cuda_target} so 文件
# Add the library.
add_library(${cuda_target} ${_cmake_options}
${_generated_files}
${_sources}
${link_file}
)
...
注释:link ${cuda_target} so 到其它依赖的cuda library (比如 libcudnn.so 等)
target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
${CUDA_LIBRARIES}
)
至此完成了通过nvcc 编译cu源代码成为so动态库的过程
run_nvcc.cmake
该cmake作为模板文件,在findCUDA.cmake中通过cmake build-in API configure_file 实例化为对应cu文件的中间文件(参考上面代码的Fenix),编译该cu文件;
它的核心功能就是通过cmake build-in API execute_process执行nvcc编译的过程,生成目标文件;由于pytorch中的每个cu文件都会有对应的一个.cmake脚本(以run_nvcc.cmake作为模板生成),所以就可以达到在cmake编译pytorch的过程中所有cu文件被nvcc编译的目标
macro(cuda_execute_process status command)
set(_command ${command})
if(NOT "x${_command}" STREQUAL "xCOMMAND")
message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})")
endif()
if(verbose)
execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
# Now we need to build up our command string. We are accounting for quotes
# and spaces, anything else is left up to the user to fix if they want to
# copy and paste a runnable command line.
set(cuda_execute_process_string)
foreach(arg ${ARGN})
# If there are quotes, excape them, so they come through.
string(REPLACE "\"" "\\\"" arg ${arg})
# Args with spaces need quotes around them to get them to be parsed as a single argument.
if(arg MATCHES " ")
list(APPEND cuda_execute_process_string "\"${arg}\"")
else()
list(APPEND cuda_execute_process_string ${arg})
endif()
endforeach()
# Echo the command
execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
endif()
# Run the command
execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
endmacro()
...
注释:通过cmake build-in API execute_process执行nvcc编译的过程,生成目标文件
# Generate the code
cuda_execute_process(
"Generating ${generated_file}"
COMMAND "${CUDA_NVCC_EXECUTABLE}"
"${source_file}"
${cuda_language_flag}
${format_flag} -o "${generated_file}"
${CCBIN}
${nvcc_flags}
${nvcc_host_compiler_flags}
${CUDA_NVCC_FLAGS}
-DNVCC
${CUDA_NVCC_INCLUDE_ARGS}
)