CUDA卷积操作—使用constant memory实现高斯滤波

先转载一篇CUDA卷积的实现,对比一下GPU与CPU中的代码有什么不同:

原文地址:CUDA卷积操作—使用constant memory实现高斯滤波

高斯滤波就是使用高斯模板和图片进行卷积运算,高斯函数及模板如下图所示:
这里写图片描述

卷积前后的效果图如下:

这里写图片描述

constant memory的使用及CUDA编程的相关内容,在代码注释中有详细介绍。
GPU代码如下所示:

#include "cuda_runtime.h"  
#include "device_launch_parameters.h"  

#include <stdio.h>  
#include "Windows.h"  
#include <math.h>  
#include<iostream>  
using namespace std;  

#define BLOCKDIM_X      16  
#define BLOCKDIM_Y      16  

#define GRIDDIM_X       256  
#define GRIDDIM_Y       256  
#define MASK_WIDTH      5  

__constant__ int d_const_Gaussian[MASK_WIDTH*MASK_WIDTH]; //分配常数存储器  

unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount);  
bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount);  
static __global__ void kernel_GaussianFilt(int width,int height,int byteCount,unsigned char *d_src_imgbuf,unsigned char *d_guassian_imgbuf);  

void main()  
{  
    //查看显卡配置  
    struct cudaDeviceProp pror;  
    cudaGetDeviceProperties(&pror,0);  
    cout<<"maxThreadsPerBlock="<<pror.maxThreadsPerBlock<<endl;  

    long start, end;  
    long time = 0;   

    //CUDA计时函数  
    start = GetTickCount();  
    cudaEvent_t startt,stop; //CUDA计时机制  
    cudaEventCreate(&startt);  
    cudaEventCreate(&stop);  
    cudaEventRecord(startt,0);  

    unsigned char *h_src_imgbuf;  //图像指针  
    int width, height, byteCount;  
    char rootPath1[]="C:\\Users\\a404\\Desktop\\测试图片\\";  
    char readPath[1024];  
    int frame=1;  
    for (int k=1;k<=frame;k++)  
    {  
        sprintf(readPath, "%s%d.bmp", rootPath1, k);  
        h_src_imgbuf=readBmp(readPath, &width, &height, &byteCount);  

        int size1=width*height *byteCount*sizeof(unsigned char);  
        int size2=width*height *sizeof(unsigned char);  

        //输出图像内存-host端    
        unsigned char *h_guassian_imgbuf=new unsigned char[width*height*byteCount];  

        //分配显存空间  
        unsigned char *d_src_imgbuf;  
        unsigned char *d_guassian_imgbuf;  

        cudaMalloc((void**)&d_src_imgbuf, size1);  
        cudaMalloc((void**)&d_guassian_imgbuf, size1);  

        //把数据从Host传到Device  
        cudaMemcpy(d_src_imgbuf, h_src_imgbuf, size1, cudaMemcpyHostToDevice);  

        //将高斯模板传入constant memory  
        int Gaussian[25] = {1,4,7,4,1,  
                            4,16,26,16,4,  
                            7,26,41,26,7,  
                            4,16,26,16,4,  
                            1,4,7,4,1};//总和为273  
        cudaMemcpyToSymbol(d_const_Gaussian, Gaussian, 25 * sizeof(int));  

        int bx = ceil((double)width/BLOCKDIM_X); //网格和块的分配  
        int by = ceil((double)height/BLOCKDIM_Y);  

        if(bx > GRIDDIM_X) bx = GRIDDIM_X;  
        if(by > GRIDDIM_Y) by = GRIDDIM_Y;  

        dim3 grid(bx, by);//网格的结构  
        dim3 block(BLOCKDIM_X, BLOCKDIM_Y);//块的结构  

        //kernel--高斯滤波  
        kernel_GaussianFilt<<<grid, block>>>(width,height,byteCount,d_src_imgbuf,d_guassian_imgbuf);  
        cudaMemcpy(h_guassian_imgbuf, d_guassian_imgbuf,size1, cudaMemcpyDeviceToHost);//数据传回主机端  

        char rootPath2[]="C:\\Users\\a404\\Desktop\\测试结果\\";  
        char writePath[1024];  
        sprintf(writePath, "%s%d.bmp", rootPath2, k);  
        saveBmp(writePath, h_guassian_imgbuf, width, height, byteCount);  

        //输出进度展示  
        cout<<k<<"  "<<((float)k/frame)*100<<"%"<<endl;  

        //释放内存  
        cudaFree(d_src_imgbuf);   
        cudaFree(d_guassian_imgbuf);  

        delete []h_src_imgbuf;  
        delete []h_guassian_imgbuf;  
    }  
    end = GetTickCount();  
    InterlockedExchangeAdd(&time, end - start);  
    cout << "Total time GPU:";  
    cout << time << endl;  
    int x;  
    cin>>x;  
}  

static __global__ void kernel_GaussianFilt(int width,int height,int byteCount,unsigned char *d_src_imgbuf,unsigned char *d_dst_imgbuf)  
{  
    const int tix = blockDim.x * blockIdx.x + threadIdx.x;  
    const int tiy = blockDim.y * blockIdx.y + threadIdx.y;  

    const int threadTotalX = blockDim.x * gridDim.x;  
    const int threadTotalY = blockDim.y * gridDim.y;  

    for(int ix = tix; ix < height; ix += threadTotalX)  
        for(int iy = tiy; iy < width; iy += threadTotalY)  
        {   
            for(int k=0;k<byteCount;k++)  
            {  
                int sum=0;//临时值  
                int tempPixelValue=0;  
                for (int m=-2;m<=2;m++)  
                {  
                    for (int n=-2;n<=2;n++)  
                    {  
                        //边界处理,幽灵元素赋值为零  
                        if (ix+m<0||iy+n<0||ix+m>=height||iy+n>=width)                        
                            tempPixelValue=0;                          
                        else                          
                            tempPixelValue=*(d_src_imgbuf+(ix+m)*width*byteCount+(iy+n)*byteCount+k);                             
                        sum+=tempPixelValue*d_const_Gaussian[(m+2)*5+n+2];  
                    }  
                }  

                if (sum/273<0)   
                    *(d_dst_imgbuf+(ix)*width*byteCount+(iy)*byteCount+k)=0;  
                else if(sum/273>255)    
                    *(d_dst_imgbuf+(ix)*width*byteCount+(iy)*byteCount+k)=255;  
                else    
                    *(d_dst_imgbuf+(ix)*width*byteCount+(iy)*byteCount+k)=sum/273;        
            }  
        }  
}  

unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount)  
{  
    //打开文件  
    FILE *fp=fopen(bmpName,"rb");  
    if(fp==0) return 0;  
    //跳过文件头  
    fseek(fp, sizeof(BITMAPFILEHEADER),0);  

    //读入信息头  
    int w, h, b;  
    BITMAPINFOHEADER head;  
    fread(&head, sizeof(BITMAPINFOHEADER), 1,fp);   
    w = head.biWidth;  
    h = head.biHeight;  
    b = head.biBitCount/8;  
    int lineByte=(w * b+3)/4*4; //每行的字节数为4的倍数  

    //跳过颜色表 (颜色表的大小为1024)(彩色图像并没有颜色表,不需要这一步)  
    if(b==1)  
        fseek(fp, 1024,1);  

    //图像数据  
    unsigned char *imgBuf=new unsigned char[w * h * b];  
    for(int i=0;i<h;i++)  
    {  
        fread(imgBuf+i*w*b,w*b, 1,fp);  
        fseek(fp, lineByte-w*b, 1);  
    }  
    fclose(fp);  

    *width=w,  *height=h, *byteCount=b;  

    return imgBuf;  
}  


bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount)  
{  
    if(!imgBuf)  
        return 0;  

    //灰度图像颜色表空间1024,彩色图像没有颜色表  
    int palettesize=0;  
    if(byteCount==1) palettesize=1024;  

    //一行象素字节数为4的倍数  
    int lineByte=(width * byteCount+3)/4*4;  

    FILE *fp=fopen(bmpName,"wb");  
    if(fp==0) return 0;  

    //填写文件头  
    BITMAPFILEHEADER fileHead;  
    fileHead.bfType = 0x4D42;  
    fileHead.bfSize=   
        sizeof(BITMAPFILEHEADER)+sizeof(BITMAPINFOHEADER)+ palettesize + lineByte*height;  
    fileHead.bfReserved1 = 0;  
    fileHead.bfReserved2 = 0;  
    fileHead.bfOffBits=54+palettesize;  
    fwrite(&fileHead, sizeof(BITMAPFILEHEADER),1, fp);  

    // 填写信息头  
    BITMAPINFOHEADER head;   
    head.biBitCount=byteCount*8;  
    head.biClrImportant=0;  
    head.biClrUsed=0;  
    head.biCompression=0;  
    head.biHeight=height;  
    head.biPlanes=1;  
    head.biSize=40;  
    head.biSizeImage=lineByte*height;  
    head.biWidth=width;  
    head.biXPelsPerMeter=0;  
    head.biYPelsPerMeter=0;  
    fwrite(&head, sizeof(BITMAPINFOHEADER),1, fp);  

    //颜色表拷贝    
    if(palettesize==1024)  
    {  
        unsigned char palette[1024];  
        for(int i=0;i<256;i++)  
        {  
            *(palette+i*4+0)=i;  
            *(palette+i*4+1)=i;  
            *(palette+i*4+2)=i;  
            *(palette+i*4+3)=0;       
        }  
        fwrite(palette, 1024,1, fp);  
    }  

    //准备数据并写文件  
    unsigned char *buf=new unsigned char[height*lineByte];  
    for(int i=0;i<height;i++)  
    {  
        for(int j=0;j<width*byteCount; j++)  
            *(buf+i*lineByte+j)=*(imgBuf+i*width*byteCount+j);  
    }  
    fwrite(buf, height*lineByte, 1, fp);  

    delete []buf;  

    fclose(fp);  

    return 1;  
}  

附录:高斯滤波CPU代码

#include "stdio.h"  
#include "Windows.h"  
#include <iostream>  
using namespace std;  

unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount); //读入图像  
bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount); //保存图像  
void GaussianFilt(int width,int height,int byteCount,int Gaussian[][5],unsigned char *gray_imgbuf,unsigned char *guassian_imgbuf); //高斯滤波  

void main()  
{  
    //计时函数  
    long start, end;  
    long time = 0;   
    start = GetTickCount();  

    unsigned char *src_imgbuf; //图像指针  
    int width, height, byteCount;  
    char rootPath1[]="C:\\Users\\a404\\Desktop\\测试图片\\";  
    char readPath[1024];  
    int frame=300;  //读入图像副数  
    for (int i=1;i<=frame;i++)  
    {  
        sprintf(readPath, "%s%d.bmp", rootPath1, i);  
        src_imgbuf=readBmp(readPath, &width, &height, &byteCount);  
        //printf("宽=%d,高=%d,字节=%d\n",width, height, byteCount);  

        //读入高斯模糊模板  
        int Gaussian_mask[5][5]={{1,4,7,4,1},{4,16,26,16,4},{7,26,41,26,7},{4,16,26,16,4},{1,4,7,4,1}};//总和为273  

        //输出图像内存分配    
        unsigned char *guassian_imgbuf=new unsigned char[width*height*byteCount];  

        //对原图高斯模糊  
        GaussianFilt(width,height,byteCount,Gaussian_mask,src_imgbuf,guassian_imgbuf);  

        char rootPath2[]="C:\\Users\\a404\\Desktop\\";  
        char writePath[1024];  
        sprintf(writePath, "%s%d.bmp", rootPath2, i);  

        saveBmp(writePath, guassian_imgbuf, width, height, byteCount);  

        cout<<i<<"  "<<((float)i/frame)*100<<"%"<<endl;  
        delete []src_imgbuf;  
        delete []guassian_imgbuf;  
    }  
    end = GetTickCount();  
    InterlockedExchangeAdd(&time, end - start);  
    cout << "Total time CPU:";  
    cout << time << endl;  
    int x;  
    cin>>x;  
}  

void GaussianFilt(int width,int height,int byteCount,int Gaussian[][5],unsigned char *src_imgbuf,unsigned char *guassian_imgbuf)  
{  
    //高斯模糊处理 5层循环处理  
    for(int i=0;i<height;i++)  
    {  
        for(int j=0;j<width;j++)  
        {  
            for(int k=0;k<byteCount;k++)  
            {  
                int sum=0;//临时值  
                int tempPixelValue=0;  
                for (int m=-2;m<=2;m++)  
                {  
                    for (int n=-2;n<=2;n++)  
                    {  
                        //边界处理,幽灵元素赋值为零  
                        if (i+m<0||j+n<0||i+m>=height||j+n>=width)                        
                            tempPixelValue=0;                          
                        else                          
                            tempPixelValue=*(src_imgbuf+(i+m)*width*byteCount+(j+n)*byteCount+k);     
                        //tempPixelValue=*(gray_imgbuf+(i+m)*width+(j+n)+k);      
                        sum+=tempPixelValue*Gaussian[m+2][n+2];  
                    }  
                }  
                //tempPixelValue=*(src_imgbuf+(i)*width*byteCount+(j)*byteCount+k);  
                if (sum/273<0)   
                    *(guassian_imgbuf+i*width*byteCount+j*byteCount+k)=0;  
                else if(sum/273>255)    
                    *(guassian_imgbuf+i*width*byteCount+j*byteCount+k)=255;  
                else    
                    *(guassian_imgbuf+i*width*byteCount+j*byteCount+k)=sum/273;       
            }  
        }  
    }  
}  

//给定一个图像文件及其路径,读入图像数据。   
unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount)  
{  
    //打开文件,  
    FILE *fp=fopen(bmpName,"rb");  
    if(fp==0) return 0;  
    //跳过文件头  
    fseek(fp, sizeof(BITMAPFILEHEADER),0);  

    //读入信息头  
    int w, h, b;  
    BITMAPINFOHEADER head;  
    fread(&head, sizeof(BITMAPINFOHEADER), 1,fp);   
    w = head.biWidth;  
    h = head.biHeight;  
    b = head.biBitCount/8;  
    int lineByte=(w * b+3)/4*4; //每行的字节数为4的倍数  

    //跳过颜色表 (颜色表的大小为1024)(彩色图像并没有颜色表,不需要这一步)  
    if(b==1)  
        fseek(fp, 1024,1);  

    //图像数据  
    unsigned char *imgBuf=new unsigned char[w * h * b];  
    for(int i=0;i<h;i++)  
    {  
        fread(imgBuf+i*w*b,w*b, 1,fp);  
        fseek(fp, lineByte-w*b, 1);  
    }  
    fclose(fp);  

    *width=w,  *height=h, *byteCount=b;  

    return imgBuf;  
}  


bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount)  
{  
    if(!imgBuf)  
        return 0;  

    //灰度图像颜色表空间1024,彩色图像没有颜色表  
    int palettesize=0;  
    if(byteCount==1) palettesize=1024;  

    //一行象素字节数为4的倍数  
    int lineByte=(width * byteCount+3)/4*4;  

    FILE *fp=fopen(bmpName,"wb");  
    if(fp==0) return 0;  

    //填写文件头  
    BITMAPFILEHEADER fileHead;  
    fileHead.bfType = 0x4D42;  
    fileHead.bfSize=   
        sizeof(BITMAPFILEHEADER)+sizeof(BITMAPINFOHEADER)+ palettesize + lineByte*height;  
    fileHead.bfReserved1 = 0;  
    fileHead.bfReserved2 = 0;  
    fileHead.bfOffBits=54+palettesize;  
    fwrite(&fileHead, sizeof(BITMAPFILEHEADER),1, fp);  

    // 填写信息头  
    BITMAPINFOHEADER head;   
    head.biBitCount=byteCount*8;  
    head.biClrImportant=0;  
    head.biClrUsed=0;  
    head.biCompression=0;  
    head.biHeight=height;  
    head.biPlanes=1;  
    head.biSize=40;  
    head.biSizeImage=lineByte*height;  
    head.biWidth=width;  
    head.biXPelsPerMeter=0;  
    head.biYPelsPerMeter=0;  
    fwrite(&head, sizeof(BITMAPINFOHEADER),1, fp);  

    //颜色表拷贝    
    if(palettesize==1024)  
    {  
        unsigned char palette[1024];  
        for(int i=0;i<256;i++)  
        {  
            *(palette+i*4+0)=i;  
            *(palette+i*4+1)=i;  
            *(palette+i*4+2)=i;  
            *(palette+i*4+3)=0;       
        }  
        fwrite(palette, 1024,1, fp);  
    }  

    //准备数据并写文件  
    unsigned char *buf=new unsigned char[height*lineByte];  
    for(int i=0;i<height;i++)  
    {  
        for(int j=0;j<width*byteCount; j++)  
            *(buf+i*lineByte+j)=*(imgBuf+i*width*byteCount+j);  
    }  
    fwrite(buf, height*lineByte, 1, fp);  

    delete []buf;  

    fclose(fp);  

    return 1;  
}  
  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
1. 主要工作: 基于MATLAB图像处理的中值滤波、均值滤波以及高斯滤波实现与对比: a) 中值滤波法是一种非线性平滑技术,它将每一像素点的灰度值设置为该点某邻域窗口内的所有像素点灰度值的中值. b) 均值滤波是典型的线性滤波算法,它是指在图像上对目标像素给一个模板,该模板包括了其周围的临近像素(以目标像素为中心的周围8个像素,构成一个滤波模板,即去掉目标像素本身),再用模板中的全体像素的平均值来代替原来像素值。 c) 高斯滤波是一种线性平滑滤波,适用于消除高斯噪声,广泛应用于图像处理的减噪过程。通俗的讲,高斯滤波就是对整幅图像进行加权平均的过程,每一个像素点的值,都由其本身和邻域内的其他像素值经过加权平均后得到。高斯滤波的具体操作是:用一个模板(或称卷积、掩模)扫描图像中的每一个像素,用模板确定的邻域内像素的加权平均灰度值去替代模板中心像素点的值。 2. 代码功能: 实现中值滤波、均值滤波以及高斯滤波,并对图像进行输出 3. 结果分析 a) 图像经过中值滤波后,高斯噪声没有被完全去除,椒盐噪声几乎被完全去除效果较好。经过均值滤波后不管是高斯噪声还是椒盐噪声大部分都没有被去除,只是稍微模糊化。经过高斯滤波后,高斯噪声和椒盐噪声几乎被很大程度的模糊化,原图好像被加上了一层蒙版。 【注】若添加图片分辨率过高会发出警报,如果可以正常输出则可以忽视。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值