我用的是VS2017,QT5.12.3以及Qt Creator4.9.0
1.首先建立VS工程,选择动态链接库
2.添加cu文件,点击项目名称,选择添加
然后选择添加cu文件
3. 写入代码,很简单的向量加法
kernel.cu
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cudadd.h"
//CUDA核函数
__global__ void addKernel(int *c, const int *a, const int *b)
{
int i = threadIdx.x;
c[i] = a[i] + b[i];
}
//向量相加
CUDADD_API int vectorAdd(int c[], int a[], int b[], int size)
{
int result = -1;
int *dev_a = 0;
int *dev_b = 0;
int *dev_c = 0;
cudaError_t cudaStatus;
// 选择用于运行的GPU
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
result = 1;
goto Error;
}
// 在GPU中为变量dev_a、dev_b、dev_c分配内存空间.
cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
result = 2;
goto Error;
}
cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
result = 3;
goto Error;
}
cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
result = 4;
goto Error;
}
// 从主机内存复制数据到GPU内存中.
cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
result = 5;
goto Error;
}
cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
result = 6;
goto Error;
}
// 启动GPU内核函数
addKernel <<<1, size >>> (dev_c, dev_a, dev_b);
// 采用cudaDeviceSynchronize等待GPU内核函数执行完成并且返回遇到的任何错误信息
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
result = 7;
goto Error;
}
// 从GPU内存中复制数据到主机内存中
cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
result = 8;
goto Error;
}
result = 0;
// 重置CUDA设备,在退出之前必须调用cudaDeviceReset
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess) {
return 9;
}
Error:
//释放设备中变量所占内存
cudaFree(dev_c);
cudaFree(dev_a);
cudaFree(dev_b);
return result;
}
cudadd.h
#pragma once
#ifdef CUDADD_EXPORTS
#define CUDADD_API __declspec(dllexport)
#else
#define CUDADD_API __declspec(dllimport)
#endif
extern "C" CUDADD_API int vectorAdd(int c[], int a[], int b[], int size);
4.修改项目的自定义方式为:CUDA10.1
5. 修改cu文件的项类型,右键点击cu文件选择“属性”
6、添加链接器的附加依赖项 cudart.lib
6. 生成
此时我们看到debug目录下多出了dll和lib文件
7. 打开QT Creator,新建一个项目
建好后右键点击项目,选择“添加库”
选择“外部库”
找到刚才生成的lib文件
8.将头文件cudadd.h拷到目录下,并添加进工程
9.在QT项目中main.cpp写入测试代码
main.cpp
#include <QCoreApplication>
#include <iostream>
#include "cudadd.h"
using namespace std;
int main(int argc, char *argv[])
{
QCoreApplication e(argc, argv);
int a[5] = {1,2,3,4,5};
int b[5] = {3,4,2,3,5};
int c[5];
vectorAdd(c,a,b,5);
cout << a[0] << ' ' << a[1] << ' ' << a[2] << ' ' << a[3] << ' ' << a[4] << endl;
cout << b[0] << ' ' << b[1] << ' ' << b[2] << ' ' << b[3] << ' ' << b[4] << endl;
cout << c[0] << ' ' << c[1] << ' ' << c[2] << ' ' << c[3] << ' ' << c[4] << endl;
return e.exec();
}
10.运行,结果无误,大功告成