CUDA卷积操作—使用constant memory实现高斯滤波

高斯滤波就是使用高斯模板和图片进行卷积运算,高斯函数及模板如下图所示:


卷积前后的效果图如下:


constant memory的使用及CUDA编程的相关内容,在代码注释中有详细介绍。
GPU代码如下所示:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include "Windows.h"
#include <math.h>
#include<iostream>
using namespace std;

#define BLOCKDIM_X		16
#define BLOCKDIM_Y		16

#define GRIDDIM_X		256
#define GRIDDIM_Y		256
#define MASK_WIDTH		5

__constant__ int d_const_Gaussian[MASK_WIDTH*MASK_WIDTH]; //分配常数存储器

unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount);
bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount);
static __global__ void kernel_GaussianFilt(int width,int height,int byteCount,unsigned char *d_src_imgbuf,unsigned char *d_guassian_imgbuf);

void main()
{
	//查看显卡配置
	struct cudaDeviceProp pror;
	cudaGetDeviceProperties(&pror,0);
	cout<<"maxThreadsPerBlock="<<pror.maxThreadsPerBlock<<endl;

	long start, end;
	long time = 0; 

	//CUDA计时函数
	start = GetTickCount();
	cudaEvent_t startt,stop; //CUDA计时机制
	cudaEventCreate(&startt);
	cudaEventCreate(&stop);
	cudaEventRecord(startt,0);

	unsigned char *h_src_imgbuf;  //图像指针
	int width, height, byteCount;
	char rootPath1[]="C:\\Users\\a404\\Desktop\\测试图片\\";
	char readPath[1024];
	int frame=1;
	for (int k=1;k<=frame;k++)
	{
		sprintf(readPath, "%s%d.bmp", rootPath1, k);
		h_src_imgbuf=readBmp(readPath, &width, &height, &byteCount);
	
		int size1=width*height *byteCount*sizeof(unsigned char);
		int size2=width*height *sizeof(unsigned char);
		
		//输出图像内存-host端	
		unsigned char *h_guassian_imgbuf=new unsigned char[width*height*byteCount];
		
		//分配显存空间
		unsigned char *d_src_imgbuf;
		unsigned char *d_guassian_imgbuf;

		cudaMalloc((void**)&d_src_imgbuf, size1);
		cudaMalloc((void**)&d_guassian_imgbuf, size1);

		//把数据从Host传到Device
		cudaMemcpy(d_src_imgbuf, h_src_imgbuf, size1, cudaMemcpyHostToDevice);

		//将高斯模板传入constant memory
		int Gaussian[25] = {1,4,7,4,1,
							4,16,26,16,4,
							7,26,41,26,7,
							4,16,26,16,4,
							1,4,7,4,1};//总和为273
		cudaMemcpyToSymbol(d_const_Gaussian, Gaussian, 25 * sizeof(int));

		int bx = ceil((double)width/BLOCKDIM_X); //网格和块的分配
		int by = ceil((double)height/BLOCKDIM_Y);

		if(bx > GRIDDIM_X) bx = GRIDDIM_X;
		if(by > GRIDDIM_Y) by = GRIDDIM_Y;

		dim3 grid(bx, by);//网格的结构
		dim3 block(BLOCKDIM_X, BLOCKDIM_Y);//块的结构

		//kernel--高斯滤波
		kernel_GaussianFilt<<<grid, block>>>(width,height,byteCount,d_src_imgbuf,d_guassian_imgbuf);
		cudaMemcpy(h_guassian_imgbuf, d_guassian_imgbuf,size1, cudaMemcpyDeviceToHost);//数据传回主机端

		char rootPath2[]="C:\\Users\\a404\\Desktop\\测试结果\\";
		char writePath[1024];
		sprintf(writePath, "%s%d.bmp", rootPath2, k);
		saveBmp(writePath, h_guassian_imgbuf, width, height, byteCount);
		
		//输出进度展示
		cout<<k<<"  "<<((float)k/frame)*100<<"%"<<endl;
	
		//释放内存
		cudaFree(d_src_imgbuf); 
		cudaFree(d_guassian_imgbuf);
		
		delete []h_src_imgbuf;
		delete []h_guassian_imgbuf;
	}
	end = GetTickCount();
	InterlockedExchangeAdd(&time, end - start);
	cout << "Total time GPU:";
	cout << time << endl;
	int x;
	cin>>x;
}

static __global__ void kernel_GaussianFilt(int width,int height,int byteCount,unsigned char *d_src_imgbuf,unsigned char *d_dst_imgbuf)
{
	const int tix = blockDim.x * blockIdx.x + threadIdx.x;
	const int tiy = blockDim.y * blockIdx.y + threadIdx.y;

	const int threadTotalX = blockDim.x * gridDim.x;
	const int threadTotalY = blockDim.y * gridDim.y;

	for(int ix = tix; ix < height; ix += threadTotalX)
		for(int iy = tiy; iy < width; iy += threadTotalY)
		{ 
			for(int k=0;k<byteCount;k++)
			{
				int sum=0;//临时值
				int tempPixelValue=0;
				for (int m=-2;m<=2;m++)
				{
					for (int n=-2;n<=2;n++)
					{
						//边界处理,幽灵元素赋值为零
						if (ix+m<0||iy+n<0||ix+m>=height||iy+n>=width)						
							tempPixelValue=0;						 
						else						
							tempPixelValue=*(d_src_imgbuf+(ix+m)*width*byteCount+(iy+n)*byteCount+k);							
						sum+=tempPixelValue*d_const_Gaussian[(m+2)*5+n+2];
					}
				}

				if (sum/273<0) 
					*(d_dst_imgbuf+(ix)*width*byteCount+(iy)*byteCount+k)=0;
				else if(sum/273>255)  
					*(d_dst_imgbuf+(ix)*width*byteCount+(iy)*byteCount+k)=255;
				else  
					*(d_dst_imgbuf+(ix)*width*byteCount+(iy)*byteCount+k)=sum/273;		
			}
	    }
}

unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount)
{
	//打开文件
	FILE *fp=fopen(bmpName,"rb");
	if(fp==0) return 0;
	//跳过文件头
	fseek(fp, sizeof(BITMAPFILEHEADER),0);

	//读入信息头
	int w, h, b;
	BITMAPINFOHEADER head;
	fread(&head, sizeof(BITMAPINFOHEADER), 1,fp); 
	w = head.biWidth;
	h = head.biHeight;
	b = head.biBitCount/8;
	int lineByte=(w * b+3)/4*4; //每行的字节数为4的倍数

	//跳过颜色表 (颜色表的大小为1024)(彩色图像并没有颜色表,不需要这一步)
	if(b==1)
		fseek(fp, 1024,1);

	//图像数据
	unsigned char *imgBuf=new unsigned char[w * h * b];
	for(int i=0;i<h;i++)
	{
		fread(imgBuf+i*w*b,w*b, 1,fp);
		fseek(fp, lineByte-w*b, 1);
	}
	fclose(fp);

	*width=w,  *height=h, *byteCount=b;

	return imgBuf;
}


bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount)
{
	if(!imgBuf)
		return 0;

	//灰度图像颜色表空间1024,彩色图像没有颜色表
	int palettesize=0;
	if(byteCount==1) palettesize=1024;

	//一行象素字节数为4的倍数
	int lineByte=(width * byteCount+3)/4*4;

	FILE *fp=fopen(bmpName,"wb");
	if(fp==0) return 0;

	//填写文件头
	BITMAPFILEHEADER fileHead;
	fileHead.bfType = 0x4D42;
	fileHead.bfSize= 
		sizeof(BITMAPFILEHEADER)+sizeof(BITMAPINFOHEADER)+ palettesize + lineByte*height;
	fileHead.bfReserved1 = 0;
	fileHead.bfReserved2 = 0;
	fileHead.bfOffBits=54+palettesize;
	fwrite(&fileHead, sizeof(BITMAPFILEHEADER),1, fp);

	// 填写信息头
	BITMAPINFOHEADER head; 
	head.biBitCount=byteCount*8;
	head.biClrImportant=0;
	head.biClrUsed=0;
	head.biCompression=0;
	head.biHeight=height;
	head.biPlanes=1;
	head.biSize=40;
	head.biSizeImage=lineByte*height;
	head.biWidth=width;
	head.biXPelsPerMeter=0;
	head.biYPelsPerMeter=0;
	fwrite(&head, sizeof(BITMAPINFOHEADER),1, fp);

	//颜色表拷贝  
	if(palettesize==1024)
	{
		unsigned char palette[1024];
		for(int i=0;i<256;i++)
		{
			*(palette+i*4+0)=i;
			*(palette+i*4+1)=i;
			*(palette+i*4+2)=i;
			*(palette+i*4+3)=0;     
		}
		fwrite(palette, 1024,1, fp);
	}

	//准备数据并写文件
	unsigned char *buf=new unsigned char[height*lineByte];
	for(int i=0;i<height;i++)
	{
		for(int j=0;j<width*byteCount; j++)
			*(buf+i*lineByte+j)=*(imgBuf+i*width*byteCount+j);
	}
	fwrite(buf, height*lineByte, 1, fp);

	delete []buf;

	fclose(fp);

	return 1;
}
附录:高斯滤波CPU代码
#include "stdio.h"
#include "Windows.h"
#include <iostream>
using namespace std;

unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount); //读入图像
bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount); //保存图像
void GaussianFilt(int width,int height,int byteCount,int Gaussian[][5],unsigned char *gray_imgbuf,unsigned char *guassian_imgbuf); //高斯滤波

void main()
{
	//计时函数
	long start, end;
	long time = 0; 
	start = GetTickCount();

	unsigned char *src_imgbuf; //图像指针
	int width, height, byteCount;
	char rootPath1[]="C:\\Users\\a404\\Desktop\\测试图片\\";
	char readPath[1024];
	int frame=300;  //读入图像副数
	for (int i=1;i<=frame;i++)
	{
		sprintf(readPath, "%s%d.bmp", rootPath1, i);
		src_imgbuf=readBmp(readPath, &width, &height, &byteCount);
		//printf("宽=%d,高=%d,字节=%d\n",width, height, byteCount);

		//读入高斯模糊模板
		int Gaussian_mask[5][5]={{1,4,7,4,1},{4,16,26,16,4},{7,26,41,26,7},{4,16,26,16,4},{1,4,7,4,1}};//总和为273

		//输出图像内存分配	
		unsigned char *guassian_imgbuf=new unsigned char[width*height*byteCount];

		//对原图高斯模糊
		GaussianFilt(width,height,byteCount,Gaussian_mask,src_imgbuf,guassian_imgbuf);
	
		char rootPath2[]="C:\\Users\\a404\\Desktop\\";
		char writePath[1024];
		sprintf(writePath, "%s%d.bmp", rootPath2, i);
		
		saveBmp(writePath, guassian_imgbuf, width, height, byteCount);

		cout<<i<<"  "<<((float)i/frame)*100<<"%"<<endl;
		delete []src_imgbuf;
		delete []guassian_imgbuf;
	}
	end = GetTickCount();
	InterlockedExchangeAdd(&time, end - start);
	cout << "Total time CPU:";
	cout << time << endl;
	int x;
	cin>>x;
}

void GaussianFilt(int width,int height,int byteCount,int Gaussian[][5],unsigned char *src_imgbuf,unsigned char *guassian_imgbuf)
{
	//高斯模糊处理 5层循环处理
	for(int i=0;i<height;i++)
	{
		for(int j=0;j<width;j++)
		{
			for(int k=0;k<byteCount;k++)
			{
				int sum=0;//临时值
				int tempPixelValue=0;
				for (int m=-2;m<=2;m++)
				{
					for (int n=-2;n<=2;n++)
					{
						//边界处理,幽灵元素赋值为零
						if (i+m<0||j+n<0||i+m>=height||j+n>=width)						
							tempPixelValue=0;						 
						else						
							tempPixelValue=*(src_imgbuf+(i+m)*width*byteCount+(j+n)*byteCount+k);	
						//tempPixelValue=*(gray_imgbuf+(i+m)*width+(j+n)+k);	
						sum+=tempPixelValue*Gaussian[m+2][n+2];
					}
				}
				//tempPixelValue=*(src_imgbuf+(i)*width*byteCount+(j)*byteCount+k);
				if (sum/273<0) 
					*(guassian_imgbuf+i*width*byteCount+j*byteCount+k)=0;
				else if(sum/273>255)  
					*(guassian_imgbuf+i*width*byteCount+j*byteCount+k)=255;
				else  
					*(guassian_imgbuf+i*width*byteCount+j*byteCount+k)=sum/273;		
			}
		}
	}
}

//给定一个图像文件及其路径,读入图像数据。 
unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount)
{
	//打开文件,
	FILE *fp=fopen(bmpName,"rb");
	if(fp==0) return 0;
	//跳过文件头
	fseek(fp, sizeof(BITMAPFILEHEADER),0);

	//读入信息头
	int w, h, b;
	BITMAPINFOHEADER head;
	fread(&head, sizeof(BITMAPINFOHEADER), 1,fp); 
	w = head.biWidth;
	h = head.biHeight;
	b = head.biBitCount/8;
	int lineByte=(w * b+3)/4*4; //每行的字节数为4的倍数

	//跳过颜色表 (颜色表的大小为1024)(彩色图像并没有颜色表,不需要这一步)
	if(b==1)
		fseek(fp, 1024,1);

	//图像数据
	unsigned char *imgBuf=new unsigned char[w * h * b];
	for(int i=0;i<h;i++)
	{
		fread(imgBuf+i*w*b,w*b, 1,fp);
		fseek(fp, lineByte-w*b, 1);
	}
	fclose(fp);

	*width=w,  *height=h, *byteCount=b;

	return imgBuf;
}


bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount)
{
	if(!imgBuf)
		return 0;

	//灰度图像颜色表空间1024,彩色图像没有颜色表
	int palettesize=0;
	if(byteCount==1) palettesize=1024;

	//一行象素字节数为4的倍数
	int lineByte=(width * byteCount+3)/4*4;

	FILE *fp=fopen(bmpName,"wb");
	if(fp==0) return 0;

	//填写文件头
	BITMAPFILEHEADER fileHead;
	fileHead.bfType = 0x4D42;
	fileHead.bfSize= 
		sizeof(BITMAPFILEHEADER)+sizeof(BITMAPINFOHEADER)+ palettesize + lineByte*height;
	fileHead.bfReserved1 = 0;
	fileHead.bfReserved2 = 0;
	fileHead.bfOffBits=54+palettesize;
	fwrite(&fileHead, sizeof(BITMAPFILEHEADER),1, fp);

	// 填写信息头
	BITMAPINFOHEADER head; 
	head.biBitCount=byteCount*8;
	head.biClrImportant=0;
	head.biClrUsed=0;
	head.biCompression=0;
	head.biHeight=height;
	head.biPlanes=1;
	head.biSize=40;
	head.biSizeImage=lineByte*height;
	head.biWidth=width;
	head.biXPelsPerMeter=0;
	head.biYPelsPerMeter=0;
	fwrite(&head, sizeof(BITMAPINFOHEADER),1, fp);

	//颜色表拷贝  
	if(palettesize==1024)
	{
		unsigned char palette[1024];
		for(int i=0;i<256;i++)
		{
			*(palette+i*4+0)=i;
			*(palette+i*4+1)=i;
			*(palette+i*4+2)=i;
			*(palette+i*4+3)=0;     
		}
		fwrite(palette, 1024,1, fp);
	}

	//准备数据并写文件
	unsigned char *buf=new unsigned char[height*lineByte];
	for(int i=0;i<height;i++)
	{
		for(int j=0;j<width*byteCount; j++)
			*(buf+i*lineByte+j)=*(imgBuf+i*width*byteCount+j);
	}
	fwrite(buf, height*lineByte, 1, fp);

	delete []buf;

	fclose(fp);

	return 1;
}



  • 1
    点赞
  • 23
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值