CUDA 1D纹理内存做高斯窗

最新推荐文章于 2024-01-04 00:02:38 发布

媛媛姐

最新推荐文章于 2024-01-04 00:02:38 发布

阅读量289

点赞数

文章标签： CUDA texture

本文链接：https://blog.csdn.net/qq_41361834/article/details/80623094

版权

纹理存储器属于只读存储器，但其拥有缓存机制，能通过缓存利用数据的局部性来提高效率。

这里在项目中需要对很多个大小相同一维数组加高斯窗，所以将高斯窗存入纹理内存，以期提高效率。

下面是刚出炉热乎的简单1D纹理内存实例，作用是加高斯窗。

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <stdio.h>

#include <math.h>
using namespace std;


// Texture reference for 1D float texture
texture<float, 1, cudaReadModeElementType> tex;

__global__ void gauss(float* pSrc, float* pDst,int DividedLength)
{
	int tx = threadIdx.x;
	//float u = (float)tx - (float)DividedLength / 2;
	float u = (float)tx;
	u = (u+0.5f) / (float)DividedLength;
	//float u = (float)tx;
	//pDst[tx] = pSrc[tx]* tex1D(tex, u+0.5f);
	pDst[tx] = pSrc[tx] * tex1D(tex, u);
}
int main()
{
	int DividedLength = 256;
	float* hSrc = (float*)malloc(sizeof(float) * DividedLength);
	float* hDst = (float*)malloc(sizeof(float) * DividedLength);
	float* h_gauss = (float*)malloc(sizeof(float) * DividedLength);
	float* pSrc;
	float* pDst;
	cudaMalloc((void **)(&pSrc), sizeof(float) * DividedLength);
	cudaMalloc((void **)(&pDst), sizeof(float) * DividedLength);
	memset(hSrc, 0, sizeof(float) * DividedLength);
	for (int i = 0; i < DividedLength; i++)
	{
		hSrc[i] = 1;

	}
	int sigma = 8;
	int gauss_n = DividedLength / (2 * sigma);//gauss_n必须除尽
	for (int i = -gauss_n * sigma; i < gauss_n * sigma; i++)
	{
		*(h_gauss + gauss_n * sigma + i) = exp(-(i*i) / (2 * float(sigma*sigma)));
		//*(h_gauss + gauss_n * sigma + i) = 1;
		cout << *(h_gauss + gauss_n * sigma + i) << " ";
	}
	cout << endl;
	cudaMemcpy(pSrc, hSrc, DividedLength * sizeof(float), cudaMemcpyHostToDevice);


	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
	cudaArray *cuArray;
	cudaMallocArray(&cuArray,&channelDesc, DividedLength);
	cudaMemcpyToArray(cuArray,0,0, h_gauss, DividedLength*sizeof(float),cudaMemcpyHostToDevice);
	tex.addressMode[0] = cudaAddressModeWrap;
	//tex.addressMode[1] = cudaAddressModeWrap;
	tex.filterMode = cudaFilterModeLinear;
	tex.normalized = true;    // access with normalized texture coordinates

	// Bind the array to the texture
	cudaBindTextureToArray(tex, cuArray, channelDesc);



	dim3 block(DividedLength);
	dim3 grid(1);
	gauss << <grid, block >> > (pSrc, pDst, DividedLength);
	cudaDeviceSynchronize();
	cudaMemcpy(hDst, pDst, sizeof(float) * DividedLength, cudaMemcpyDeviceToHost);

	for (int i = 0; i < DividedLength; i++)
	{

		cout << hDst[i] << " ";
	}




	cudaFree(pSrc);
	cudaFree(pDst);
	cudaFreeArray(cuArray);
	free(h_gauss);
	free(hSrc);
	free(hDst);
	system("pause");
	return 0;
}

1.https://www.cnblogs.com/north-north/archive/2013/04/01/2993724.html中大神有言：texture纹理内存做数据寄存器是不可取的，实验证明其存取速度并不高。纹理内存更适于图像放缩及图像旋转处理。所以高斯窗做成常数存储器或者共享存储器更为合适。

2.+0.5f是必须的，否则感觉会出现自动插值。

媛媛姐

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
CUDA 1D纹理内存做高斯窗

纹理存储器属于只读存储器，但其拥有缓存机制，能通过缓存利用数据的局部性来提高效率。这里在项目中需要对很多个大小相同一维数组加高斯窗，所以将高斯窗存入纹理内存，以期提高效率。下面是刚出炉热乎的简单1D纹理内存实例，作用是加高斯窗。#include "cuda_runtime.h"#include "device_launch_parameters.h"#include &lt;iostream&...
复制链接

扫一扫