第一步:新建MFC工程
第二步:建一个CUDA文件夹用于管理所有和CUDA有关的文件(.cu和.cuh)
第三步:建立Host.cu和Devect.cu文件
CUDA文件右键->add->new Item
第四步:在Devect.cu文件中添加代码(“C”必须大写,不知道什么小写会报错)
第五步:在Host.cn文件中添加代码(“C”必须大写,不知道什么小写会报错)
#include <cuda_runtime.h>
extern "C" __global__void vectorAdd(const float *A, const float *B, float *C, int numElements);
extern "C" voidCUDA_Run_Function(float*ha,float *hb,float *hc,int nElementCount)
{
cudaError_terr = cudaSuccess;
size_t size = nElementCount* sizeof(float);
err = cudaSetDevice(0);
float *da = NULL;
err = cudaMalloc((void**)&da, size);
float *db = NULL;
err = cudaMalloc((void**)&db, size);
float *dc = NULL;
err = cudaMalloc((void**)&dc, size);
err = cudaMemcpy(da, ha, size, cudaMemcpyHostToDevice);
err = cudaMemcpy(db, hb, size, cudaMemcpyHostToDevice);
int threadsPerBlock = 256;
int blocksPerGrid =(nElementCount+ threadsPerBlock - 1) / threadsPerBlock;
vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(da, db, dc, nElementCount);
err = cudaGetLastError();
err = cudaThreadSynchronize();
err = cudaMemcpy(hc, dc, size, cudaMemcpyDeviceToHost);
// Free deviceglobal memory
err = cudaFree(da);
err = cudaFree(db);
err = cudaFree(dc);
}
第六步,在对话框上面添加button按键Id为IDC_MAINDLG_RUNCUDA并添加相应的响应消息。
第七步,在CUDATestProjectDlg.cpp添加
第八步:button响应消息函数添加代码
int nElementCount = 10;
float *ha = NULL;
float *hb = NULL;
float *hc = NULL;
ha = (float *)new float[nElementCount];
hb = (float *)new float[nElementCount];
hc = (float *)new float[nElementCount];
for (int i = 0; i < nElementCount;++i)
{
ha[i] = rand()/(float)255;
hb[i] = rand()/(float)255;
}
CUDA_Run_Function(ha,hb,hc,nElementCount);
for (int i = 0; i < nElementCount;++i)
{
CStringstrMsgInfo;
strMsgInfo.Format("%f %f%f",ha[i],hb[i],hc[i]);
AfxMessageBox(strMsgInfo);
}
if (ha != NULL)
{
delete[] ha;
ha = NULL;
}
if (hb != NULL)
{
delete[] hb;
hb = NULL;
}
if (hc != NULL)
{
delete[] hc;
hc = NULL;
}
第九步,对工程进行设置
CUDATestProject右键->CustomBuild Rule
CUDATestProject右键->properties
$(CUDA_PATH)/include;./;../../common/inc
$(CUDA_PATH)/lib/$(PlatformName);../../common/lib/$(PlatformName);
cuda.lib cudart.lib
Host.cu\Devect.cu右键->properties
本工程是64位工程上面x86改为x64
第十步,运行程序
不知道为什么最后一位计算出来不对,可能是精度不同造成的吧。