qtCreator+MSVC2017编译器+CUDA混合编程的三种方法
尝试在Qt中实现CUDA的混合编程,因为CUDA不支持MinGW编译器,因此只能使用VS编译器,于是选用MSVC2017编译器。
为了实现混合编程,其实实际上是同一种方法,核心在于对CUDA部分的代码使用NVCC编译器,而对别的代码内容使用VS2017编译器。区别在于,对CUDA代码的编译结果在VS2017中调用的方式。
一、准备好工程文件
(一)Qt中创建一个工程:
(二)cuda程序内容
bilinear.cu
#include "bilinear.h"
extern "C"
__global__ void hellofromGPU(void)
{
printf("GPU:hello sunyi\n");
}
void showhello(void)
{
hellofromGPU <<<1,10>>>();
cudaDeviceSynchronize();
}
(三)cuda程序的头文件内容
bilinear.h
#ifndef BILINEAR_H
#define BILINEAR_H
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include "malloc.h"
#define WIDTH 11
#define HEIGHT 10
#define X_INTER 3
#define Y_INTER 3
#define BLOCK_SIZE 8
void showhello(void);
#endif // BINLINEARINTERPOLATIONCUDA_H
(四)主程序入口内容
main.cpp
#include<stdio.h>
#include "bilinear.h"
int main(void)
{
showhello();
while(1);
}
//nvcc -arch sm_45
二、混合编程的三种方法
(一)使用NVCC编译器预编译生成*.obj后在Qt工程文档中调用
-
将VS2017的include,lib,bin写入环境变量。如果是默认的安装目录,应该是在C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6的目录下。
-
使用VS2017自带的命令行工具,适用于VS 2017的x64本机工具命令提示(也有可能是英文名,搞不清是哪个,可以将命令工具挨个试一下)。转到bilinear.cu目录下。
-
输入
nvcc -Xcompiler "/MDd" -D_DEBUG -c bilinear.cu
,则会生成bilinear.obj。
-
而后在testCUDA.pro中添加
OBJECTS += bilinear.obj
,就可以正常编译了。 -
将bilinear.obj放到编译目录下,debug下。
(二)在pro文件中添加额外编译器nvcc和主程序同时编译
这个不需要事先生成bilinear.obj文件,但在pro文件中添加额外编译器nvcc的目的也是生成bilinear.obj,原理是一样的,只是将生成的命令写入pro文件中,在编译前先生成pro文件。
testCUDA.pro
QT -= gui
CONFIG += c++11 console
CONFIG -= app_bundle
# You can make your code fail to compile if it uses deprecated APIs.
# In order to do so, uncomment the following line.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
SOURCES += \
main.cpp
OBJECTS +=
# CUDA 设置
CUDA_SOURCES += bilinear.cu
CUDA_DIR = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6"
SYSTEM_NAME = x64
SYSTEM_TYPE = 64
CUDA_ARCH = sm_61
NVCC_OPTIONS = --use_fast_math
# 头文件路径
INCLUDEPATH += "$$CUDA_DIR/include"
# 导入库文件路径
QMAKE_LIBDIR += "$$CUDA_DIR/lib/x64"
CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
# 依赖库
CUDA_LIB_NAMES += \
cuda \
cudadevrt \
cudart \
CONFIG(debug, debug|release) {
CUDA_LIB_NAMES += ucrtd \
}
else{
CUDA_LIB_NAMES += ucrt \
}
for(lib, CUDA_LIB_NAMES) {
CUDA_LIBS += $$lib.lib
}
for(lib, CUDA_LIB_NAMES) {
NVCC_LIBS += -l$$lib
}
LIBS += $$NVCC_LIBS
MSVCRT_LINK_FLAG_DEBUG = "/MDd"
MSVCRT_LINK_FLAG_RELEASE = "/MD"
# 这部分内容,实质上就是在Qt中调用环境中的命令行工具来先编译生成bilinear.obj
CONFIG(debug, debug|release) {
# Debug 模式
OBJECTS_DIR = debug/obj
CUDA_OBJECTS_DIR = debug
cuda_d.input = CUDA_SOURCES
cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}.obj
cuda_d.commands = $$CUDA_DIR/bin/nvcc.exe -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$LIBS \
--machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH \
--compile -cudart static -g -DWIN32 -D_MBCS \
-Xcompiler "/wd4819,/EHsc,/W3,/nologo,/Od,/Zi,/RTC1" \
-Xcompiler $$MSVCRT_LINK_FLAG_DEBUG \
-c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda_d.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda_d
}
else {
# Release 模式
OBJECTS_DIR = release/obj
CUDA_OBJECTS_DIR = release
cuda.input = CUDA_SOURCES
cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}.obj
cuda.commands = $$CUDA_DIR/bin/nvcc.exe $$NVCC_OPTIONS $$CUDA_INC $$LIBS \
--machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH \
--compile -cudart static -D_MBCS \
-Xcompiler "/wd4819,/EHsc,/W3,/nologo,/O3,/Zi" \
-Xcompiler $$MSVCRT_LINK_FLAG_RELEASE \
-c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda
}
HEADERS += \
bilinear.h
DISTFILES += \
bilinear.cu
注意! 这个方法,我在windows sever2019服务器上使用,不知道为什么,一直没有编译生成bilinear.obj文件,试了很久都没成功,但是换了一台windows10,几乎一样的工具,一遍就编译通过了。所以如果你也遇到了类似的问题,换个电脑,换个系统试试。
正常编译输出内容如下,仅供参考:
12:00:27: 为项目testCUDA执行步骤 ...
12:00:27: 配置没有改变, 跳过 qmake 步骤。
12:00:28: 正在启动 "C:\Qt\Qt5.12.12\Tools\QtCreator\bin\jom\jom.exe"
C:\Qt\Qt5.12.12\Tools\QtCreator\bin\jom\jom.exe -f Makefile.Debug
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6/bin/nvcc.exe -D_DEBUG --use_fast_math -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6/include" -lcuda -lcudadevrt -lcudart -lucrtd --machine 64 -arch=sm_61 --compile -cudart static -g -DWIN32 -D_MBCS -Xcompiler /wd4819,/EHsc,/W3,/nologo,/Od,/Zi,/RTC1 -Xcompiler /MDd -c -o debug\bilinear.obj ..\testCUDA\bilinear.cu
cl -c -nologo -Zc:wchar_t -FS -Zc:rvalueCast -Zc:inline -Zc:strictStrings -Zc:throwingNew -Zc:referenceBinding -Zc:__cplusplus -Zi -MDd -W3 -w34100 -w34189 -w44996 -w44456 -w44457 -w44458 -wd4577 -wd4467 -EHsc /Fddebug\obj\testCUDA.vc.pdb -DUNICODE -D_UNICODE -DWIN32 -D_ENABLE_EXTENDED_ALIGNED_STORAGE -DWIN64 -DQT_QML_DEBUG -DQT_CORE_LIB -I..\testCUDA -I. -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include" -IC:\Qt\Qt5.12.12\5.12.12\msvc2017_64\include -IC:\Qt\Qt5.12.12\5.12.12\msvc2017_64\include\QtCore -Idebug -IC:\Qt\Qt5.12.12\5.12.12\msvc2017_64\mkspecs\win32-msvc -Fodebug\obj\ @C:\Users\ADMINI~1\AppData\Local\Temp\main.obj.31692.343.jom
main.cpp
bilinear.cu
link /NOLOGO /DYNAMICBASE /NXCOMPAT /DEBUG /SUBSYSTEM:CONSOLE "/MANIFESTDEPENDENCY:type='win32' name='Microsoft.Windows.Common-Controls' version='6.0.0.0' publicKeyToken='6595b64144ccf1df' language='*' processorArchitecture='*'" /MANIFEST:embed /OUT:debug\testCUDA.exe @C:\Users\ADMINI~1\AppData\Local\Temp\testCUDA.exe.31692.12781.jom
12:00:42: 进程"C:\Qt\Qt5.12.12\Tools\QtCreator\bin\jom\jom.exe"正常退出。
12:00:42: Elapsed time: 00:15.
把Makefile.Debug中的部分内容也贴一下,仅供参考:
compiler_cuda_d_make_all: debug\bilinear.obj
compiler_cuda_d_clean:
-$(DEL_FILE) debug\bilinear.obj
debug\bilinear.obj: ..\testCUDA\bilinear.cu \
..\testCUDA\bilinear.h \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\cuda_runtime.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\host_config.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\builtin_types.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\device_types.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\host_defines.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\driver_types.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\vector_types.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\surface_types.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\texture_types.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\library_types.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\channel_descriptor.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\cuda_runtime_api.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\cuda_device_runtime_api.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\driver_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\vector_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\vector_functions.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\device_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\device_functions.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\device_atomic_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\device_atomic_functions.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\device_double_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\device_double_functions.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_20_atomic_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_20_atomic_functions.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_32_atomic_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_32_atomic_functions.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_35_atomic_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_60_atomic_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_60_atomic_functions.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_20_intrinsics.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_20_intrinsics.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_30_intrinsics.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_30_intrinsics.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_32_intrinsics.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_32_intrinsics.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_35_intrinsics.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_61_intrinsics.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\sm_61_intrinsics.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\sm_70_rt.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\sm_70_rt.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\sm_80_rt.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\sm_80_rt.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\surface_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\cuda_surface_types.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\texture_fetch_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\cuda_texture_types.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\texture_indirect_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\surface_indirect_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\common_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\math_functions.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\func_macro.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\crt\math_functions.hpp" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\math_constants.h" \
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include\device_launch_parameters.h"
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6/bin/nvcc.exe -D_DEBUG --use_fast_math -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6/include" -lcuda -lcudadevrt -lcudart -lucrtd --machine 64 -arch=sm_61 --compile -cudart static -g -DWIN32 -D_MBCS -Xcompiler /wd4819,/EHsc,/W3,/nologo,/Od,/Zi,/RTC1 -Xcompiler /MDd -c -o debug\bilinear.obj ..\testCUDA\bilinear.cu
(三)将cu文件封装为库文件,而后在工程文件中引用
这个方法我具体没有测试,但是理论上跟方法一应该是同个道理,只是方法一是将bilinear.cu文件编译成了bilinear.obj文件,在这个方法中,是将bilinear.cu封装成bilinear.lib或者bilinear.dll文件,而后调用。是一个道理。
在这个方法中,在编译完成后,应该至少生成两个文件。
bilinear.h和bilinear.lib。或者还有一个bilinear.dll。
具体的我就不试了,但是想想应该是简单的。
包括在openCV中调用到CUDA的内容,应该实质上也是类似于此的方法,因此调用openCV的库文件,并不需要自己解决CUDA的配置问题。
三、总结
- 千万记住CUDA的编译暂不支持MinGW,所以别在这方面浪费时间了。
- 如果生成obj文件失败,可以换个系统试试。根据我的观察,我在之前那个系统中一直生成失败,是因为在之前那个系统中,压根就没启用cmd命令行来调用额外编译器nvcc,具体原因暂时不知道。
- 关于不同显卡的sm值是多少的填写,在英伟达官网可以查到,比如我的GTX1060 3G写的是6.1,则sm=61。查询GPU算力的官网