1 实验环境
Ubuntu 16.04 + Qt5 + CUDA9.0
2 实例
- CUDA配置全部放在.pro文件中
- main.cpp为主函数使用g++编译
- kernel.cu为核函数使用nvcc编译
2.1 kernel.cu
// kernel.cu
#include <cuda_runtime.h>
#include <stdio.h>
extern "C" void runCudaPart();
__global__ void addAry(int *ary1, int *ary2)
{
int indx = threadIdx.x;
ary1[indx] += ary2[indx];
}
// Main cuda function
void runCudaPart()
{
int ary1[32];
int ary2[32];
int res[32];
for(int i=0;i<32;i++)
{
ary1[i] = i;
ary2[i] = 2*i;
res[i] = 0;
}
int *dev_ary1, *dev_ary2;
/* Allocate GPU buffers */
cudaMalloc((void**)&dev_ary1, 32*sizeof(int));
cudaMalloc((void**)&dev_ary2, 32*sizeof(int));
/* Copy input vectors from host memory to GPU buffers */
cudaMemcpy((void*)dev_ary1, (void*)ary1, 32*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy((void*)dev_ary2, (void*)ary2, 32*sizeof(int), cudaMemcpyHostToDevice);
/* Launch a kernel on the GPU with one thread for each element */
addAry<<<4,8>>>(dev_ary1, dev_ary2);
/* cudaThreadSynchronize waits for the kernel to finish */
cudaThreadSynchronize();
/* Copy output vector from GPU buffer to host memory */
cudaMemcpy((void*)res, (void*)dev_ary1, 32*sizeof(int), cudaMemcpyDeviceToHost);
for( int i=0 ; i<32 ; i++ )
printf( "result[%d] = %d\n", i, res[i]);
cudaFree(dev_ary1);
cudaFree(dev_ary2);
}
2.2 kernel.h
// kernel.h
#ifndef KERNEL_H
#define KERNEL_H
extern "C" void runCudaPart();
#endif // KERNEL_H
2.3 main.cpp
#include <iostream>
#include "kernel.h"
int main()
{
std::cout << "--- This is a demo:" <<std::endl;
runCudaPart();
std::cout << "--- Ending ... " <<std::endl;
return 0;
}
2.4 pro文件的配置
TEMPLATE = app
CONFIG += console c++11
CONFIG -= app_bundle
CONFIG -= qt
QT += core
QT -= gui
TARGET = cuda-demo
# Specify the directory where the generated application is placed
#DESTDIR += ./
SOURCES += main.cpp
OBJECTS_DIR = ./debug__
CUDA_OBJECTS_DIR = ./debug__
# This makes the .cu files appear in your project
#OTHER_FILES += ./kernel.cu
# CUDA settings <-- may change depending on your system
CUDA_SOURCES += ./kernel.cu
CUDA_SDK = "/usr/local/cuda-9.0/" # Path to cuda SDK install
CUDA_DIR = "/usr/local/cuda-9.0/" # Path to cuda toolkit install
# DO NOT EDIT BEYOND THIS UNLESS YOU KNOW WHAT YOU ARE DOING....
SYSTEM_NAME = ubuntu # Depending on your system either 'Win32', 'x64', or 'Win64'
SYSTEM_TYPE = 64 # '32' or '64', depending on your system
CUDA_ARCH = sm_50 # Type of CUDA architecture,
# for example 'compute_10', 'compute_11', 'sm_10'
NVCC_OPTIONS = --use_fast_math
# include paths
INCLUDEPATH += $$CUDA_DIR/include
# library directories
QMAKE_LIBDIR += $$CUDA_DIR/lib64/
# Add the necessary libraries
CUDA_LIBS = -lcuda -lcudart
# The following makes sure all path names (which often include spaces)
# are put between quotation marks
CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
LIBS += $$CUDA_LIBS
# Configuration of the Cuda compiler
CONFIG(debug, debug|release) {
# Debug mode
cuda.input = CUDA_SOURCES
cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
cuda.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG $$NVCC_OPTIONS \
$$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE \
-arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda
}
else {
# Release mode
cuda.input = CUDA_SOURCES
cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS \
$$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE \
-arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
cuda.dependency_type = TYPE_C
QMAKE_EXTRA_COMPILERS += cuda
}
HEADERS += \
kernel.h
2.5 运行结果
3 其他
有关pro文件的配置情况,可以参看《Qt 之 pro 配置详解》