cuda实现均值滤波

lopiyi

于 2023-08-06 11:06:35 发布

阅读量279

点赞数

分类专栏： cuda 文章标签： opencv 计算机视觉 c++

本文链接：https://blog.csdn.net/qq_42112607/article/details/132128760

版权

cuda 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

窗口尺寸必须是大于1的奇数，窗口宽高可以不等。

#include <iostream>
#include<cuda.h>
#include <cuda_runtime_api.h>
#include <opencv2/opencv.hpp>
#include<device_launch_parameters.h>
using namespace std;

// CUDA错误检查宏
#define CUDA_CHECK(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
{
	if (code != cudaSuccess)
	{
		std::cerr << "CUDA Error: " << cudaGetErrorString(code) << " " << file << " " << line << std::endl;
		if (abort) exit(code);
	}
}
/ 定义块大小和窗口大小
#define BLOCK_SIZE 16


// CUDA核函数：中值滤波
__global__ void medianFilter(const unsigned char* input, unsigned char* output, const int width, const int height, const int channels,
							 const int kernelW, const int kernelH)
{
	// 计算当前线程的位置
	int col = blockIdx.x * blockDim.x + threadIdx.x;
	int row = blockIdx.y * blockDim.y + threadIdx.y;
	//printf("blockIdx.y:%d\tblockIdx.x:%d\tblockDim.y:%d\tblockDim.x:%d\n", blockIdx.y, blockIdx.x,blockDim.y, blockDim.x);
	// 计算图像填充的大小
	const int paddingW = kernelW / 2;
	const int paddingH = kernelH / 2;

	// 计算像素在图像中的索引
	int index = (row * width + col) * channels;

	// 检查当前线程是否在图像范围内
	if (col < width && row < height)
	{
		// 计算中值滤波后的像素值
		unsigned char sortedWindow[30 * 30];
		

		// 将窗口内的像素复制到排序数组中
		for (int k = 0; k < channels; k++) {
			int count = 0;
			for (int i = -paddingH; i <= paddingH; i++) {
				for (int j = -paddingW; j <= paddingW; j++) {
					// 计算当前像素的位置
					int curRow = row + i;
					int curCol = col + j;

					// 边界处理：使用复制填充
					curRow = min(max(curRow, 0), height - 1);
					curCol = min(max(curCol, 0), width - 1);
					sortedWindow[count] = input[(curRow * width + curCol) * channels + k];
					count++;
				}
			}
			// 对窗口内的像素进行排序
			for (int i = 0; i < count - 1; i++) {
				for (int j = i + 1; j < count; j++) {
					if (sortedWindow[i] > sortedWindow[j]) {
						unsigned char temp = sortedWindow[i];
						sortedWindow[i] = sortedWindow[j];
						sortedWindow[j] = temp;
					}
				}
			}
			// 将中值像素复制到输出图像中
			output[index + k] = sortedWindow[count / 2];
			//printf("input:%d\tout:%d\n", input[index + k], output[index + k]);
		}
	}
}
void test3(void)
{
	int sz = 1048576 * 100;
	cudaDeviceSetLimit(cudaLimitPrintfFifoSize, sz);
	// 加载输入图像
	cv::Mat image = cv::imread("F:\\pic_data\\CBSD68\\3096.png", cv::IMREAD_COLOR);

	// 检查图像是否成功加载
	if (image.empty()) {
		std::cout << "Unable to read image" << std::endl;
		return ;
	}

	// 获取图像的宽度、高度和通道数
	int width = image.cols;
	int height = image.rows;
	int channels = image.channels();

	// 计算图像数据大小
	size_t imageSize = width * height * channels;

	// 分配主机内存并将输入图像数据复制到主机内存中
	unsigned char* hostInput = new unsigned char[imageSize];
	memcpy(hostInput, image.data, imageSize);

	// 分配设备内存并将输入图像数据复制到设备内存中
	unsigned char* deviceInput;
	cudaMalloc((void**)&deviceInput, imageSize);
	cudaMemcpy(deviceInput, hostInput, imageSize, cudaMemcpyHostToDevice);

	// 分配设备内存用于存储输出图像数据
	unsigned char* deviceOutput;
	cudaMalloc((void**)&deviceOutput, imageSize);

	// 计算线程块和网格的大小
	dim3 blockSize(BLOCK_SIZE, BLOCK_SIZE);
	dim3 gridSize((width + blockSize.x - 1) / blockSize.x, (height + blockSize.y - 1) / blockSize.y);
	cout << gridSize.x << "\t" << gridSize.y << endl;
	// 
	int kernelW = 7;
	int kernelH = 7;
	int shareMemorySize = (BLOCK_SIZE + 2 * (kernelW / 2))*(BLOCK_SIZE + 2 * (kernelH / 2))*channels;
	// 调用CUDA核函数进行中值滤波处理
	medianFilter << <gridSize, blockSize >> > (deviceInput, deviceOutput, width, height, channels, kernelW, kernelH);

	// 将结果从设备内存复制回主机内存
	unsigned char* hostOutput = new unsigned char[imageSize];
	cudaMemcpy(hostOutput, deviceOutput, imageSize, cudaMemcpyDeviceToHost);

	// 创建输出图像
	cv::Mat output(height, width, image.type());
	memcpy(output.data, hostOutput, imageSize);

	// 显示输入和输出图像
	cv::imshow("Input Image", image);
	cv::imshow("Output Image", output);
	cv::waitKey(0);
	cv::destroyAllWindows();
	// 释放内存
	delete[] hostInput;
	delete[] hostOutput;
	cudaFree(deviceInput);
	cudaFree(deviceOutput);
}