CUDA C++ 对核函数同时传递多张图像

最新推荐文章于 2024-04-27 16:49:54 发布

哦嚯、

最新推荐文章于 2024-04-27 16:49:54 发布

阅读量976

点赞数 1

分类专栏： CUDA OpenCV 文章标签： c++ opencv 图像处理

本文链接：https://blog.csdn.net/weixin_43865692/article/details/126249647

版权

OpenCV 同时被 2 个专栏收录

7 篇文章 1 订阅

订阅专栏

CUDA

1 篇文章 0 订阅

订阅专栏

本文在此链接的基础上进行实现多张图像传递给核函数的方法：给cuda核函数传递二维数组的一种方法_lingerlanlan的博客-CSDN博客

CUDA+VS+OpenCV

可将图像放进一个数组中，通过传递一个指针，核函数可访问多张图像；在核函数处理结束后，将处理后的图像重新传回CPU中。

__global__ void demo_dev(cufftComplex ** src, int w, int h)
{
	int idx = blockIdx.x*blockDim.x + threadIdx.x;

	int i = idx / w;
	int j = idx % w;

	if (i < h && j < w)
	{
		if (i % 2 == 0)
		{
			src[0][idx].x = 0;
			src[0][idx].y = 0;
		}

		if (j % 2 == 0)
		{
			src[1][idx].x = 0;
			src[1][idx].y = 0;
		}
	}



}

//核函数传递数组测试
void demo13()
{
	int w, h;

	int num = 2;

	cufftComplex** host_2d = new cufftComplex*[num];
	cufftComplex** dev_2d;

	for (int k = 0; k < num; k++)
	{
		Mat temp = imread("lena.jpg", 0);

		w = temp.cols;
		h = temp.rows;

		temp.convertTo(temp, CV_32FC1);

		Mat planes[] = { Mat_<float>(temp),Mat::zeros(h,w,CV_32FC1) };

		merge(planes, 2, temp);


		cufftComplex* dev_1d = NULL;
		cudaMalloc((void**)&dev_1d, sizeof(cufftComplex)*w*h);//该指针指向的是一个float数组
		cudaMemcpy(dev_1d, temp.data, sizeof(cufftComplex)*w*h, cudaMemcpyHostToDevice);

		host_2d[k] = dev_1d;


	}

	cudaMalloc((void**)&dev_2d, sizeof(cufftComplex*)*num);
	cudaMemcpy(dev_2d, host_2d, sizeof(cufftComplex*)*num, cudaMemcpyHostToDevice);

	demo_dev << <h, w >> > (dev_2d, w, h);

	cudaDeviceSynchronize();
	cudaError_t err = cudaGetLastError();
	if (err != cudaSuccess) {
		printf("CUDA Error: %s\n", cudaGetErrorString(err));
		// Possibly: exit(-1) if program cannot continue....
	}
	else
	{
		printf("no error!\n");
	}


	cufftComplex** output = new cufftComplex*[num];
	
	cudaMemcpy(output, dev_2d, sizeof(cufftComplex*)*num, cudaMemcpyDeviceToHost);

	Mat dst0 = Mat::zeros(h, w, CV_32FC2);
	Mat dst1 = Mat::zeros(h, w, CV_32FC2);

	cudaMemcpy(dst0.data, output[0], sizeof(cufftComplex)*w*h, cudaMemcpyDeviceToHost);
	cudaMemcpy(dst1.data, output[1], sizeof(cufftComplex)*w*h, cudaMemcpyDeviceToHost);

	cout << endl;
}