1. 创建AddVectors.h文件
#ifndef__ADDVECTORS_H__#define__ADDVECTORS_H_externvoidaddVectors(float*A,float*B,float*C,intsize);#endif// ____ADDVECTORS_H_
2. 创建AddVectors.cu函数,这个是CUDA函数
#include"AddVectors.h"#include"D:Matlab\extern\include\mex.h"__global__voidaddVectorsMask(float*A,float*B,float*C,intsize){inti=threadIdx.x+blockIdx.x*blockDim.x;if(i!=size)return;C[i]=A[i]+B[i];}voidaddVectors(float*A,float*B,float*C,intsize){float*devPtrA=0,*devPtrB=0,*devPtrC=0;cudaMalloc(&devPtrA,sizeof(float)*size);cudaMalloc(&devPtrB,sizeof(float)*size);cudaMalloc(&devPtrC,sizeof(float)*size);cudaMemcpy(devPtrA,A,sizeof(float)*size,cudaMemcpyHostToDevice);cudaMemcpy(devPtrB,B,sizeof(float)*size,cudaMemcpyHostToDevice);addVectorsMask<<>>(devPtrA,devPtrB,devPtrC,size);cudaMemcpy(C,devPtrC,sizeof(float)*size,cudaMemcpyDeviceToHost);cudaFree(devPtrA);cudaFree(devPtrB);cud