CUDA：纹理内存入门到精通--纹理参考

最新推荐文章于 2025-03-11 17:05:02 发布

yanqswhu

最新推荐文章于 2025-03-11 17:05:02 发布

阅读量3.8k

点赞数 5

分类专栏： CUDA

本文链接：https://blog.csdn.net/u012348774/article/details/79138901

版权

CUDA 专栏收录该内容

17 篇文章

订阅专栏

导读

CUDA：纹理内存入门到精通–纹理内存概要
 CUDA：纹理内存入门到精通–纹理参考
 CUDA：纹理内存入门到精通–纹理对象

背景

上一个博客已经完整的介绍了纹理内存的优点，本博客主要介绍如何通过纹理参考的方式使用纹理内存。

纹理参考

纹理参考是使用纹理内存的一种方式，也是CUDA早期支持的一种版本。纹理参考大概可以分为一维纹理参考和二维纹理参考。两者的作用完全相同，达到的加速效果也十分接近。但是考虑到在使用纹理内存时，通常是需要对图片进行处理，那么二维纹理内存使用起来相对更加方便，其具有以下两个优点：

自动处理边界问题，无需判断采样点是否超出边界
自带基于最邻近插值和双线性插值。

纹理参考API

纹理参考的API主要包括纹理参考声明、纹理参考绑定、纹理参考使用和纹理参考解绑。

######纹理参考声明
纹理参考的声明方式如下，其必须声明为全局变量，且不可更改。

texture<DataType, Type, ReadMode> texRef;
//DataType是纹理的数据类型，例如uchar,float,double等等
//Type是纹理的维度，此处我们只针对2维纹理，因此Type=2（一维当然就是Type=1）
//ReadMode是纹理访问模式，包括cudaReadModeNormalizedFloat和cudaReadModeElementType两种。
// 再次提醒一次：cudaFilterModeLinear必须配合float类型使用；因此如果是int类型，必须使用cudaReadModeNormalizedFloat
// 纹理参考只有cudaReadModeElementType模式
//cudaReadModeNormalizedFloat将char或uchar返回归一化的纹理值，即浮点型；8位或16位的整型返回浮点类型的结果。
//cudaReadModeElementType返回原始类型的纹理值。

纹理参考还有一些属性可以在主机端进行修改，如下：

struct textureReference 
{ 
    //坐标是否归一化
    int normalized; 
    //纹理取值模式
    //包括cudaFilterModePoint（最邻近插值）和cudaFilterModeLinear（双线性插值）两种
    // 再次提醒一次：cudaFilterModeLinear必须配合float使用
    enum cudaTextureFilterMode filterMode; 
    //确定当纹理访问超出边界时的处理方式
    //该变量是一个3维数组，每一维确定一个维度的处理方式（CUDA最多支持三维纹理参考）
    //cudaAddressModeClamp模式：将超出坐标截断为最大值或最小值，即返回图像边缘像素值
    //cudaAddressModeBorder模式：如坐标超出图像范围则返回0
    //cudaAddressModeWrap和cudaAddressModeMirror模式：将图像看成周期函数进行访问，Mirror加了一个镜像
    enum cudaTextureAddressMode addressMode[3]; 
    //用于描述各个通道的信息，定义如下
    //struct cudaChannelFormatDesc { int x, y, z, w; enum cudaChannelFormatKind f; };
    // 8表示整数，32表示float，64表示double
    //其中x,y,z，w分别表示rgb-alpah通道的字节数，cudaChannelFormatKind则是更进一步细化各通道的情况
    //cudaChannelFormatKindSigned 对应singed int,
    //cudaChannelFormatKindUnsigned 对应unsigened int,
    //cudaChannelFormatKindFloat  对应浮点数
    struct cudaChannelFormatDesc channelDesc;  
    ... 
}

######纹理参考绑定
纹理参考在使用时必须要进行绑定。针对线性内存可以使用 cudaBindTexture()和cudaBindTexture2D()；针对二维内存（CUDA Array）可以使用cudaBindTextureToArray（）。下边是一些实例代码，分别演示如何进行纹理绑定：

线性内存绑定到二维纹理参考

//声明二维纹理参考
texture<float, cudaTextureType2D,cudaReadModeElementType> texRef;

//声明全局线性内存
float *dev_ptr;
size_t pitch;//对其参数
cudaMallocPitch((void**)dev_ptr,pitch,width*sizeof(float),height);//注意拷贝时要用cudaMemcpy2D

//绑定
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
size_t offset;
cudaBindTexture2D(&offset, texRef, dev_ptr, channelDesc,width, height, pitch);

二维内存绑定到二维纹理参考

texture<float, cudaTextureType2D,cudaReadModeElementType> texRef;

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaArray *dev_array;
cudaMallocArray(dev_array,&channelDesc,width,height);//注意拷贝时要用cudaMemcpyArray

cudaBindTextureToArray(texRef, dev_array);

######纹理参考使用
针对二维纹理参考，是使用的主要API是tex2D，其定义如下：

template<class T>
T tex2D(cudaTextureObject_t texObj, float x, float y);

######纹理参考解绑
解绑相对而言较为简单，其API如下：

//即直接输入参数就是待解绑的参考
__host__ cudaError_t cudaUnbindTexture ( const texture < T, dim, readMode > & tex )

示例代码

#include <time.h>
#include <iostream>
using namespace std;

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <cmath>


// 2维浮点型纹理内存
texture<float, cudaTextureType2D, cudaReadModeElementType> texRef;

// 核函数
__global__ void transformKernel(float* output,
                                int width, int height,
                                float theta) 
{

    // 归一化坐标
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if( x<0 || x>width || y<0 || y>height)
        return;


    // 纹理读取
    // 一定要偏移0.5像素，原因是CUDA在采样时偏移了0.5像素
    // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#linear-filtering
    output[y * width + x] = tex2D(texRef, x+0.5f, y+0.5f);
}


int main()
{
    //实验数据
	int width = 10;
        int height = 10;
        int size = width*height*sizeof(float);

	float h_data[width*height];

        for(int y=0;y<height;y++)
	{
	  for(int x=0;x<width;x++)
	  {
	    h_data[y*width+x] = x;
	  }
        }
    
        for (int y = 0; y<height; y++)
	{
		for (int x = 0; x<width; x++)
		{
			printf("%f ", h_data[y*width + x]);
		}
		printf("\n");
	}
    printf("\n");

    // 设备内存声明，此处以cudaArray为例
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0,cudaChannelFormatKindFloat);
    
    cudaArray* cuArray;
    cudaMallocArray(&cuArray, &channelDesc, width, height);

    // 数据拷贝
    cudaMemcpyToArray(cuArray, 0, 0, h_data, size,cudaMemcpyHostToDevice);

    // 设定纹理参考的属性
    texRef.addressMode[0] = cudaAddressModeBorder;
    texRef.addressMode[1] = cudaAddressModeBorder;
    texRef.filterMode     = cudaFilterModeLinear;
    texRef.normalized     = 0;

    // 纹理绑定
    cudaBindTextureToArray(texRef, cuArray);

    // 保存结果
    float* output;
    cudaMalloc(&output, size);

    // 核函数运行
    dim3 dimBlock(16, 16);
    dim3 dimGrid( max( (width+dimBlock.x-1)/dimBlock.x,1 ),
                  max( (height+dimBlock.y-1)/dimBlock.y,1) );
    transformKernel<<<dimGrid, dimBlock>>>(output, width, height);

    cudaMemcpy(h_data, output, size, cudaMemcpyDeviceToHost);

	for (int y = 0; y<height; y++)
	{
		for (int x = 0; x<width; x++)
		{
			printf("%f ", h_data[y*width + x]);
		}
		printf("\n");
	}

    // 释放内存
    cudaFreeArray(cuArray);
    cudaFree(output);

    return 0;
}