第一个基于CUDA的简单中值滤波程序 VS2010+CUDA8.0

最新推荐文章于 2024-09-12 11:14:11 发布

选择和努力哪一个重要

最新推荐文章于 2024-09-12 11:14:11 发布

阅读量1k

点赞数 3

本文链接：https://blog.csdn.net/qq_29444571/article/details/81316036

版权

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "opencv2/video/tracking.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <time.h>
#include <iostream>
using namespace std;
using namespace cv;
#include <stdio.h>
//定义图像大小
#define N 32
//定义block_size

__global__ void MediaFilter(uchar* In,uchar* Out, int Width, int Height, clock_t *time)
{
   uchar window[9];
   unsigned int x=blockIdx.x * blockDim.x + threadIdx.x;
   unsigned int y=blockIdx.y * blockDim.y + threadIdx.y;
   //记录运行开始的时间
   clock_t start;
   if(x==0&&y==0) start = clock();

   if(x>=Width && y>=Height) {printf("break %s\n","");return;}
   window[0] = ( y==0 || x==0 ) ? 0 : In[( y - 1 ) * Width + x - 1];
   window[1] = ( y==0 ) ? 0 : In[ ( y - 1 ) * Width + x ];
   window[2] = ( y==0 || x==Width-1 ) ? 0 : In[ ( y - 1 ) * Width + x + 1 ];
   window[3] = ( x==0 ) ? 0: In[ y * Width + x ];
   window[4] = In[ y * Width + x ];
   window[5] = ( x==Width-1 ) ? 0 : In[ y * Width + x + 1 ];
   window[6] = ( y==Height-1 || x == 0 ) ? 0:In[ (y + 1 ) * Width + x - 1 ];
   window[7] = ( y==Height-1 ) ? 0 : In[ ( y + 1 ) * Width + x];
   window[8] = ( y==Height-1 || x==Width-1 ) ? 0:In[ ( y + 1 ) * Width + x + 1 ];
   for (unsigned int j = 0; j < 5; ++j)
   {
       int min1 = j;
       for (unsigned int l=j+1; l<9; ++l)
       {
           if (window[l] < window[min1] )
           {
               min1 = l;
           }
       }
       const uchar temp = window[j];
       window[j] = window[min1];
       window[min1] = temp;
   }
   Out[y*Width + x] = window[4];
   if(x==0&&y==0) *time = clock() - start;
//   printf("中值滤波结果 %i\n",Out[y*Width + x]);
}
int main()
{
   Mat Img = imread("C:\\Users\\scczyy\\Desktop\\study\\ImageMedia\\lena.png",IMREAD_GRAYSCALE);

imshow("原图",Img);

   int Height = Img.rows;
   int Width = Img.cols;
   int Len = Height*Width;
   int MemSize = Len*sizeof(uchar);

printf("data type is %i\n",Img.type());
printf(" the image width is %i\n the image height is %i\n",Width,Height);

   uchar *dev_Img;
   uchar *dev_OutImg;
   clock_t* time;

   clock_t startTime,endTime;
   clock_t startTime1,endTime1;

   cudaMalloc((void**)&dev_Img,MemSize);
    cudaMalloc((void**)&dev_OutImg,MemSize);
   cudaMalloc((void**)&time,sizeof(clock_t));

cudaMemcpy(dev_Img,Img.data,MemSize,cudaMemcpyHostToDevice);

   dim3 threadsPerBlock(N,N);
   dim3 blocks( (Width+threadsPerBlock.x-1)/threadsPerBlock.x, (Height+threadsPerBlock.y-1)/threadsPerBlock.y );
//   dim3 blocks(16,16);
   startTime = clock();
   MediaFilter<<<blocks,threadsPerBlock>>>(dev_Img,dev_OutImg,Width,Height,time);
   endTime = clock();
   cout << "CUDA Time Is " << (double)(endTime - startTime)/CLOCKS_PER_SEC <<"s"<<endl;
   //Mat OutImg(Img.rows,Img.cols,CV_8UC1);
   Mat OutImg = Mat::zeros(Img.rows,Img.cols, CV_8UC1);
   clock_t time_use;
   cudaMemcpy(OutImg.data,dev_OutImg,MemSize,cudaMemcpyDeviceToHost);
   cudaMemcpy(&time_use,time,sizeof(clock_t),cudaMemcpyDeviceToHost);
   printf("GPUtime: %d\n",time_use);

// cout << "【逗号分隔方式】"<<endl<<format(OutImg,"csv")<<endl;

   startTime1 = clock();
   medianBlur(Img,Img,3);
   endTime1 = clock();
   cout << "OPENCV Time Is " << (double)(endTime1 - startTime1)/CLOCKS_PER_SEC <<"s"<<endl;

   imshow("CUDA",OutImg);
   imshow("OPENCV",Img);
   waitKey(0);
   cudaFree(dev_Img);
   cudaFree(dev_OutImg);
   return 0;
}
相比于opencv，提速10倍以上