CUDA 纹理内存

//Sobel 边缘提取 使用纹理内存

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include<iostream>
#include"opencv2\opencv.hpp"
using namespace std;
using namespace cv;

//声明纹理参考系
texture<uchar,2,cudaReadModeElementType> tex;//2d texture
//设备端kernel函数
__global__ void ImgFilter_3x3_m_kernel(uchar* pImgOut,
   uchar* pImgIn,
   int nWidth,
   int nHeight,
   int nWidthStep)
{
const int ix=blockIdx.x*blockDim.x+threadIdx.x;
const int iy=blockIdx.y*blockDim.y+threadIdx.y;
int point=ix+iy*nWidthStep;
//const int ix_1=max(0,ix-1);
//const int ix1=min(nWidth-1,ix+1);
//const int iy_1=max(0,iy-1);
//const int iy1=min(nHeight-1,iy+1);

if(ix<nWidth&&iy<nHeight)
{

uchar point00=tex2D(tex,(float)ix-1,(float)iy-1);
uchar point01=tex2D(tex,(float)ix,(float)iy-1);
uchar point02=tex2D(tex,(float)ix+1,(float)iy-1);
uchar point10=tex2D(tex,(float)ix-1,(float)iy);
uchar point11=tex2D(tex,(float)ix,(float)iy);
uchar point12=tex2D(tex,(float)ix+1,(float)iy);
uchar point20=tex2D(tex,(float)ix-1,(float)iy+1);
uchar point21=tex2D(tex,(float)ix,(float)iy+1);
uchar point22=tex2D(tex,(float)ix+1,(float)iy+1);
int Horz=point02+2*point12+point22-point00-2*point10-point20;
int Vert=point00+2*point01+point02-point20-2*point21-point22;
int Sum=abs(Horz)+abs(Vert);
if(Sum>255) pImgOut[point]=255;
//else if(Sum>255) pImgOut[iy*nWidthStep+ix]=255;
else pImgOut[point]=(uchar)Sum;

}
}
//设备端代码
double cudaImgMeanFilter(uchar* pImgOut,
 uchar* pImgIn,
 int nWidth,
 int nHeight,
 int nWidthStep,
 int nChannels)
{
//记录时间事件
cudaEvent_t start,stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
//准备设备端空间
uchar* d强调内容_pImgInGPU;
uchar* d_pImgOutGPU;
cudaMalloc((void**)&d_pImgInGPU,nWidthStep*nHeight*sizeof(uchar));
cudaMalloc((void**)&d_pImgOutGPU,nWidthStep*nHeight*sizeof(uchar));
//数据初始化
cudaMemcpy(d_pImgInGPU,pImgIn,nWidthStep*nHeight*sizeof(uchar),cudaMemcpyHostToDevice);
cudaMemset(d_pImgOutGPU,0,nWidthStep*nHeight*sizeof(uchar));
//建立CUDA二维数组
cudaArray* cuArray;
cudaChannelFormatDesc channelDesc=cudaCreateChannelDesc<uchar>();//CUDA数组的描述(组件数量和数据类型)
cudaMallocArray(&cuArray,&channelDesc,nWidth,nHeight);
cudaMemcpyToArray(cuArray,0,0,pImgIn,sizeof(uchar)*nWidth*nHeight,cudaMemcpyHostToDevice);
//将显存数据与纹理绑定
cudaBindTextureToArray(&tex,cuArray,&channelDesc);
cudaEventRecord(start,0);//开始时刻
//启动kernel进行并行处理
dim3 threads(16,16);
dim3 grid((nWidth*nChannels+threads.x-1)/threads.x,(nHeight+threads.y-1)/threads.y);
ImgFilter_3x3_m_kernel<<<grid,threads>>>(d_pImgOutGPU,d_pImgInGPU,nWidth,nHeight,nWidthStep);
    cudaEventRecord(stop,0);//结束时刻
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime,start,stop);
printf("Time to generate:%3.1f ms\n",elapsedTime);
cudaEventDestroy(start);
cudaEventDestroy(stop);
//解绑定
cudaUnbindTexture(tex);
cudaFreeArray(cuArray);


//数据输出
cudaMemcpy(pImgOut,d_pImgOutGPU,nWidthStep*nHeight*sizeof(uchar),cudaMemcpyDeviceToHost);
//释放空间
cudaFree(d_pImgInGPU);
cudaFree(d_pImgOutGPU);
return 0;
}

int main()
{
//读入图片
Mat ImgIn=imread("D:\\Images\\lenna.bmp",0);
//检查是否成功
if(ImgIn.empty())
{
cout<<"Can not load image"<<endl;
return -1;
}
//读取图片数据结构
Mat ImgOut=ImgIn.clone();
int nWidth=ImgIn.cols;
int nHeight=ImgIn.rows;
int nWidthStep=ImgIn.step;
int nChannels=ImgIn.channels();
uchar* pSrc=ImgIn.data;
uchar* pDest=ImgOut.data;
//
cudaImgMeanFilter(pDest,pSrc,nWidth,nHeight,nWidthStep,nChannels);
//显示图片
imshow("ImgIn",ImgIn);
imshow("ImgOut",ImgOut);
waitKey(0);
return 0;
}

速度反而变慢了!!!

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值