【架构分析】pytorch cuda 编译扩展解析

最新推荐文章于 2022-11-10 00:54:30 发布

HaoBBNuanMM

最新推荐文章于 2022-11-10 00:54:30 发布

阅读量861

点赞数 1

本文链接：https://blog.csdn.net/HaoBBNuanMM/article/details/115720457

版权

背景介绍

本文基于pytorch 1.7 分析它是如何扩展cmake编译脚本，支持使用nvidia的cuda sdk编译cuda cu 源代码的原理

编译架构

从上图可以看到，CMakeLists.txt中使用了扩展的API cuda_add_library/cuda_add_executable 编译cu文件，在cmake目录下面则实现了这两个扩展API以达到使用cuda toolchain编译cu文件的目标

代码分析

FindCUDA.cmak

从架构图可以看到FindCUDA.cmak 是实现扩展API cuda_add_library api的主要文件，分析如下

#   -- Creates an executable "cuda_target" which is made up of the files
#      specified.  All of the non CUDA C files are compiled using the standard
#      build rules specified by CMAKE and the cuda files are compiled to object
#      files using nvcc and the host compiler.  In addition CUDA_INCLUDE_DIRS is
#      added automatically to include_directories().  Some standard CMake target
#      calls can be used on the target after calling this macro
#      (e.g. set_target_properties and target_link_libraries), but setting
#      properties that adjust compilation flags will not affect code compiled by
#      nvcc.  Such flags should be modified before calling CUDA_ADD_EXECUTABLE,
#      CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS.
#
#   CUDA_ADD_LIBRARY( cuda_target file0 file1 ...
#                     [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
#   -- Same as CUDA_ADD_EXECUTABLE except that a library is created.

注释已经把这个api说明的很清楚了，cuda_add_executable 会用nvcc编译cuda files成为可执行文件，cuda_add_library则变成变成lib

###############################################################################
###############################################################################
# ADD LIBRARY
###############################################################################
###############################################################################
macro(CUDA_ADD_LIBRARY cuda_target)

  CUDA_ADD_CUDA_INCLUDE_ONCE()

  # Separate the sources from the options
  CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
  CUDA_BUILD_SHARED_LIBRARY(_cuda_shared_flag ${ARGN})
  # Create custom commands and targets for each file.
  CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources}
    ${_cmake_options} ${_cuda_shared_flag}
    OPTIONS ${_options} )

  # Compute the file name of the intermedate link file used for separable
  # compilation.
  CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")

  # Add the library.
  add_library(${cuda_target} ${_cmake_options}
    ${_generated_files}
    ${_sources}
    ${link_file}
    )

  # Add a link phase for the separable compilation if it has been enabled.  If
  # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS
  # variable will have been defined.
  CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")

  target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
    ${CUDA_LIBRARIES}
    )

  if(CUDA_SEPARABLE_COMPILATION)
    target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
      ${CUDA_cudadevrt_LIBRARY}
      )
  endif()

  # We need to set the linker language based on what the expected generated file
  # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
  set_target_properties(${cuda_target}
    PROPERTIES
    LINKER_LANGUAGE ${CUDA_C_OR_CXX}
    )

endmacro()

CUDA_GET_SOURCES_AND_OPTIONS 提取出编译参数 ${_options} ${_cmake_options}，编译的源码文件 ${_sources}

CUDA_BUILD_SHARED_LIBRARY 提取编译参数 ${_cuda_shared_flag}

CUDA_WRAP_SRCS 将源码调用nvcc 进行编译

注释：为每个要编译的源码在build目录下生成几个中间文件，其中最重要的是.cmake文件


      # Set all of our file names.  Make sure that whatever filenames that have
      # generated_file_path in them get passed in through as a command line
      # argument, so that the ${CMAKE_CFG_INTDIR} gets expanded at run time
      # instead of configure time.
      set(generated_file "${generated_file_path}/${generated_file_basename}")
      set(cmake_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.depend")
      set(NVCC_generated_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.NVCC-depend")
      set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt")
      set(custom_target_script_pregen "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake.pre-gen")
      set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}$<$<BOOL:$<CONFIG>>:.$<CONFIG>>.cmake")

...

注释：这个.cmake 中间文件是由run_nvcc.cmake作为模板实例化里面的变量生成的

      # Configure the build script
      configure_file("${CUDA_run_nvcc}" "${custom_target_script_pregen}" @ONLY)
      file(GENERATE
        OUTPUT "${custom_target_script}"
        INPUT "${custom_target_script_pregen}"
        )

...

注释：通过cmake build-in的api add_custom_command 调用.cmake 中间文件将源文件使用nvcc编译成为期望的目标文件（比如 .o)

      # Build the generated file and dependency file ##########################
      add_custom_command(
        OUTPUT ${generated_file}
        # These output files depend on the source_file and the contents of cmake_dependency_file
        ${main_dep}
        DEPENDS ${CUDA_NVCC_DEPEND}
        DEPENDS ${custom_target_script}
        # Make sure the output directory exists before trying to write to it.
        COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}"
        COMMAND ${CMAKE_COMMAND} ARGS
          -D verbose:BOOL=${verbose_output}
          ${ccbin_flags}
          -D build_configuration:STRING=${CUDA_build_configuration}
          -D "generated_file:STRING=${generated_file}"
          -D "generated_cubin_file:STRING=${generated_cubin_file}"
          -P "${custom_target_script}"
        WORKING_DIRECTORY "${cuda_compile_intermediate_directory}"
        COMMENT "${cuda_build_comment_string}"
        ${_verbatim}
        )

注释：将nvcc编译出来的目标文件链接成为期望的${cuda_target} so 文件
  
# Add the library.
  add_library(${cuda_target} ${_cmake_options}
    ${_generated_files}
    ${_sources}
    ${link_file}
    )

...

注释：link ${cuda_target} so 到其它依赖的cuda library （比如 libcudnn.so 等）
  target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
    ${CUDA_LIBRARIES}
    )

至此完成了通过nvcc 编译cu源代码成为so动态库的过程

run_nvcc.cmake

该cmake作为模板文件，在findCUDA.cmake中通过cmake build-in API configure_file 实例化为对应cu文件的中间文件（参考上面代码的Fenix），编译该cu文件；

它的核心功能就是通过cmake build-in API execute_process执行nvcc编译的过程，生成目标文件；由于pytorch中的每个cu文件都会有对应的一个.cmake脚本（以run_nvcc.cmake作为模板生成），所以就可以达到在cmake编译pytorch的过程中所有cu文件被nvcc编译的目标

macro(cuda_execute_process status command)
  set(_command ${command})
  if(NOT "x${_command}" STREQUAL "xCOMMAND")
    message(FATAL_ERROR "Malformed call to cuda_execute_process.  Missing COMMAND as second argument. (command = ${command})")
  endif()
  if(verbose)
    execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
    # Now we need to build up our command string.  We are accounting for quotes
    # and spaces, anything else is left up to the user to fix if they want to
    # copy and paste a runnable command line.
    set(cuda_execute_process_string)
    foreach(arg ${ARGN})
      # If there are quotes, excape them, so they come through.
      string(REPLACE "\"" "\\\"" arg ${arg})
      # Args with spaces need quotes around them to get them to be parsed as a single argument.
      if(arg MATCHES " ")
        list(APPEND cuda_execute_process_string "\"${arg}\"")
      else()
        list(APPEND cuda_execute_process_string ${arg})
      endif()
    endforeach()
    # Echo the command
    execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
  endif()
  # Run the command
  execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
endmacro()

...

注释：通过cmake build-in API execute_process执行nvcc编译的过程，生成目标文件

# Generate the code
cuda_execute_process(
  "Generating ${generated_file}"
  COMMAND "${CUDA_NVCC_EXECUTABLE}"
  "${source_file}"
  ${cuda_language_flag}
  ${format_flag} -o "${generated_file}"
  ${CCBIN}
  ${nvcc_flags}
  ${nvcc_host_compiler_flags}
  ${CUDA_NVCC_FLAGS}
  -DNVCC
  ${CUDA_NVCC_INCLUDE_ARGS}
  )

HaoBBNuanMM

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【架构分析】pytorch cuda 编译扩展解析

目录背景介绍编译架构代码分析FindCUDA.cmakrun_nvcc.cmake背景介绍本文基于pytorch 1.7 分析它是如何扩展cmake编译脚本，支持使用nvidia的cuda sdk编译cuda cu 源代码的原理编译架构pytorch CUDA编译扩展架构图从上图可以看到，CMakeLists.txt中使用了扩展的API cuda_add_library/cuda_add_executable 编译cu文件，在cmake目录下面则实现了这两个.
复制链接

扫一扫