nvcc编译选项

yuzyoong

已于 2024-09-14 08:40:58 修改

阅读量701

点赞数 15

文章标签： linux

于 2024-09-10 16:21:17 首次发布

本文链接：https://blog.csdn.net/yu916618373/article/details/142102180

版权

文章目录

一、nvcc介绍

官方文档: https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html

nvcc编译选项

Option		describtion
Long Name	Short Name
–cuda	-cuda	.cpp.ii附加到源文件名后，如x.cu.cpp.ii。此输出文件可以由nvcc用于预处理.cu文件的宿主编译器编译。
–compile	-c	编译生成object文件. 源文件名，后缀在Linux上替换为o，在Windows上替换为obj
–cubin	-cubin
–ptx	-ptx
–fatbin	–fatbin
–device-link	-dlink	链接object文件. Windows上的a_dlink.obj或其他平台上的a_dlink.o
–device-link --cubin	-dlink -cubin	链接cubin文件
–device-link --fatbinbin	-dlink -fatbin	链接fatbin文件
–lib	-lib	Windows上的.lib或其他平台上的.a
–ccbin	-ccbin	（常用）使用g++编译器，生成cpu上的可执行文件，可执行文件中包含fatbin（作为elf文件中的一个段）

cuda基础概念

cubin
是在gpu上运行的elf文件, 里面包含gpu的sass指令.
fatbin
包含ptx和cubin.
ptx
是inline-ptx-assembly，是一种中间表示，可以用来兼容下一代sass指令的执行。

二、编译例子

普通编译(编译生成sm_86的sass和ptx)

nvcc -ccbin /usr/bin/g++-10 hello.cu --cudart shared -gencode arch=compute_86,code=compute_86 -gencode arch=compute_86,code=sm_86 -g -o hello.cu.out

编译选项	col2	col3
-ccbin /usr/bin/g+±10	使用g+±10编译
hello.cu	要编译的目标文件
–cudart shared	动态链接libcudart.so
-gencode arch=compute_86,code=compute_86	生成sass指令，版本为sm_86 (若需要生成多个版本，这里可以指定多个)
-gencode arch=compute_86,code=sm_86	生成ptx，版本为sm_86 (若需要生成多个版本，这里可以指定多个)
-g	带调试信息 (类似这些与gcc/g++编译参数一致, 包括链接/头文件路径导入等等 -l -L -I etc.)

编译保留中间文件
–verbose --keep : 保留编译过程的中间(fatbin)，并附带详细编译信息。

$ nvcc -ccbin /usr/bin/g++-10 hello.cu --cudart shared -gencode arch=compute_86,code=compute_86 -gencode arch=compute_86,code=sm_86 -g -o hello.cu.out --verbose --keep
#$ _NVVM_BRANCH_=nvvm
#$ _SPACE_=
#$ _CUDART_=cudart
#$ _HERE_=/usr/local/cuda-11.8/bin
#$ _THERE_=/usr/local/cuda-11.8/bin
#$ _TARGET_SIZE_=
#$ _TARGET_DIR_=
#$ _TARGET_DIR_=targets/x86_64-linux
#$ TOP=/usr/local/cuda-11.8/bin/..
#$ NVVMIR_LIBRARY_DIR=/usr/local/cuda-11.8/bin/../nvvm/libdevice
#$ LD_LIBRARY_PATH=/usr/local/cuda-11.8/bin/../lib:
#$ PATH=/usr/local/cuda-11.8/bin/../nvvm/bin:/usr/local/cuda-11.8/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin
#$ INCLUDES="-I/usr/local/cuda-11.8/bin/../targets/x86_64-linux/include"
#$ LIBRARIES=  "-L/usr/local/cuda-11.8/bin/../targets/x86_64-linux/lib/stubs" "-L/usr/local/cuda-11.8/bin/../targets/x86_64-linux/lib"
#$ CUDAFE_FLAGS=
#$ PTXAS_FLAGS=
#$ rm hello.cu_dlink.reg.c
#$ "/usr/bin"/g++-10 -D__CUDA_ARCH__=860 -D__CUDA_ARCH_LIST__=860 -E -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDACC__ -D__NVCC__  "-I/usr/local/cuda-11.8/bin/../targets/x86_64-linux/include"    -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=8 -D__CUDACC_VER_BUILD__=89 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=8 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 -g "hello.cu" -o "hello.cpp1.ii"
#$ cicc --c++14 --gnu_version=100500 --display_error_number --orig_src_file_name "hello.cu" --orig_src_path_name "/home/yuzhiyong/mywork/gitee/hello_samples/cuda_samples/Level-1/cuda-hello/tmp/hello.cu" --allow_managed   -arch compute_86 -m64 --no-version-ident -ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 --include_file_name "hello.fatbin.c" -tused --gen_module_id_file --module_id_file_name "hello.module_id" --gen_c_file_name "hello.cudafe1.c" --stub_file_name "hello.cudafe1.stub.c" --gen_device_file_name "hello.cudafe1.gpu"  "hello.cpp1.ii" -o "hello.ptx"
#$ ptxas -arch=sm_86 -m64  "hello.ptx"  -o "hello.sm_86.cubin"
#$ fatbinary --create="hello.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " "--image3=kind=elf,sm=86,file=hello.sm_86.cubin" "--image3=kind=ptx,sm=86,file=hello.ptx" --embedded-fatbin="hello.fatbin.c"
#$ "/usr/bin"/g++-10 -D__CUDA_ARCH_LIST__=860 -E -x c++ -D__CUDACC__ -D__NVCC__  "-I/usr/local/cuda-11.8/bin/../targets/x86_64-linux/include"    -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=8 -D__CUDACC_VER_BUILD__=89 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=8 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -include "cuda_runtime.h" -m64 -g "hello.cu" -o "hello.cpp4.ii"
#$ cudafe++ --c++14 --gnu_version=100500 --display_error_number --orig_src_file_name "hello.cu" --orig_src_path_name "/home/yuzhiyong/mywork/gitee/hello_samples/cuda_samples/Level-1/cuda-hello/tmp/hello.cu" --allow_managed  --m64 --parse_templates --gen_c_file_name "hello.cudafe1.cpp" --stub_file_name "hello.cudafe1.stub.c" --module_id_file_name "hello.module_id" "hello.cpp4.ii"
#$ "/usr/bin"/g++-10 -D__CUDA_ARCH__=860 -D__CUDA_ARCH_LIST__=860 -c -x c++  -DCUDA_DOUBLE_MATH_FUNCTIONS "-I/usr/local/cuda-11.8/bin/../targets/x86_64-linux/include"   -m64 -g "hello.cudafe1.cpp" -o "hello.o"
#$ nvlink -m64 --arch=sm_86 --register-link-binaries="hello.cu_dlink.reg.c"    "-L/usr/local/cuda-11.8/bin/../targets/x86_64-linux/lib/stubs" "-L/usr/local/cuda-11.8/bin/../targets/x86_64-linux/lib" -cpu-arch=X86_64 "hello.o"  -lcudadevrt  -o "hello.cu_dlink.sm_86.cubin" --host-ccbin "/usr/bin/g++-10"
#$ fatbinary --create="hello.cu_dlink.fatbin" -64 --cicc-cmdline="-ftz=0 -prec_div=1 -prec_sqrt=1 -fmad=1 " -link "--image3=kind=elf,sm=86,file=hello.cu_dlink.sm_86.cubin" --embedded-fatbin="hello.cu_dlink.fatbin.c"
#$ "/usr/bin"/g++-10 -D__CUDA_ARCH_LIST__=860 -c -x c++ -DFATBINFILE="\"hello.cu_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"hello.cu_dlink.reg.c\"" -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__  "-I/usr/local/cuda-11.8/bin/../targets/x86_64-linux/include"    -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=8 -D__CUDACC_VER_BUILD__=89 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=8 -D__NVCC_DIAG_PRAGMA_SUPPORT__=1 -m64 -g "/usr/local/cuda-11.8/bin/crt/link.stub" -o "hello.cu_dlink.o"
#$ "/usr/bin"/g++-10 -D__CUDA_ARCH_LIST__=860 -m64 -g -Wl,--start-group "hello.cu_dlink.o" "hello.o"   "-L/usr/local/cuda-11.8/bin/../targets/x86_64-linux/lib/stubs" "-L/usr/local/cuda-11.8/bin/../targets/x86_64-linux/lib"  -lcudadevrt  -lcudart  -Wl,--end-group -o "hello.cu.out"

查看编译结果
只有hello.cu.out是我们的目标文件，其他都是中间临时文件。

$ ls
hello.cpp1.ii  hello.cu.out             hello.cu_dlink.o            hello.cudafe1.c    hello.cudafe1.stub.c  hello.module_id  hello.sm_86.cubin
hello.cpp4.ii  hello.cu_dlink.fatbin    hello.cu_dlink.reg.c        hello.cudafe1.cpp  hello.fatbin          hello.o
hello.cu       hello.cu_dlink.fatbin.c  hello.cu_dlink.sm_86.cubin  hello.cudafe1.gpu  hello.fatbin.c        hello.ptx

三、其他编译

编译生成cubin文件

cubin是指在gpu上运行的elf文件，里面包含GPU的sass指令。(无法在CPU上执行)

// 编译命令
nvcc -cubin hello.cu -o hello.cubin
// 编译指定版本的sass指令
nvcc -cubin -arch=sm_86 hello.cu -o hello.cubin

编译生成ptx文件

  nvcc -ptx hello.cu -arch=sm_86 -o hello.ptx
  // 或
  nvcc -gencode arch=compute_75,code=sm_75 -ptx -o myprogram.ptx myprogram.cu

将ptx文件编译为cubin文件

  ptxas -arch=sm_86 -o hello.cubin hello.ptx

yuzyoong

关注

15
点赞
踩
8

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫