本文在此链接的基础上进行实现多张图像传递给核函数的方法:给cuda核函数传递二维数组的一种方法_lingerlanlan的博客-CSDN博客
CUDA+VS+OpenCV
可将图像放进一个数组中,通过传递一个指针,核函数可访问多张图像;在核函数处理结束后,将处理后的图像重新传回CPU中。
__global__ void demo_dev(cufftComplex ** src, int w, int h)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
int i = idx / w;
int j = idx % w;
if (i < h && j < w)
{
if (i % 2 == 0)
{
src[0][idx].x = 0;
src[0][idx].y = 0;
}
if (j % 2 == 0)
{
src[1][idx].x = 0;
src[1][idx].y = 0;
}
}
}
//核函数传递数组测试
void demo13()
{
int w, h;
int num = 2;
cufftComplex** host_2d = new cufftComplex*[num];
cufftComplex** dev_2d;
for (int k = 0; k < num; k++)
{
Mat temp = imread("lena.jpg", 0);
w = temp.cols;
h = temp.rows;
temp.convertTo(temp, CV_32FC1);
Mat planes[] = { Mat_<float>(temp),Mat::zeros(h,w,CV_32FC1) };
merge(planes, 2, temp);
cufftComplex* dev_1d = NULL;
cudaMalloc((void**)&dev_1d, sizeof(cufftComplex)*w*h);//该指针指向的是一个float数组
cudaMemcpy(dev_1d, temp.data, sizeof(cufftComplex)*w*h, cudaMemcpyHostToDevice);
host_2d[k] = dev_1d;
}
cudaMalloc((void**)&dev_2d, sizeof(cufftComplex*)*num);
cudaMemcpy(dev_2d, host_2d, sizeof(cufftComplex*)*num, cudaMemcpyHostToDevice);
demo_dev << <h, w >> > (dev_2d, w, h);
cudaDeviceSynchronize();
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA Error: %s\n", cudaGetErrorString(err));
// Possibly: exit(-1) if program cannot continue....
}
else
{
printf("no error!\n");
}
cufftComplex** output = new cufftComplex*[num];
cudaMemcpy(output, dev_2d, sizeof(cufftComplex*)*num, cudaMemcpyDeviceToHost);
Mat dst0 = Mat::zeros(h, w, CV_32FC2);
Mat dst1 = Mat::zeros(h, w, CV_32FC2);
cudaMemcpy(dst0.data, output[0], sizeof(cufftComplex)*w*h, cudaMemcpyDeviceToHost);
cudaMemcpy(dst1.data, output[1], sizeof(cufftComplex)*w*h, cudaMemcpyDeviceToHost);
cout << endl;
}