下面是对Texture Reference的实验,代码改自 CUDA C PROGRAMMING GUIDE (PG-02829-001_v10.0 | October 2018) p54,因为这里比较贴近计算机图形学,故移到图形学中去。
©版权所有!
/*
下面是对CUDA的Texture Reference的实验,实验环境VS2017,CUDA 10,GTX 1060。
作者:吕翔宇,部分代码改自CUDA手册
E-mail:630056108@qq.com
2019.3.2 20.44
——————
一次修正:补释放h_data,out防止内存泄漏
by吕翔宇
2019.3.3 23:44
*/
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include<iostream>
#include<cmath>
#define __CUDACC__
#define __cplusplus
#include<texture_fetch_functions.h>
#include<cuda.h>
#include<cuda_texture_types.h>
texture<float, cudaTextureType2D, cudaReadModeElementType>texRef;
//转换的简单例子
__global__ void transformKernel(float *output1, float *output2, int width, int height, float theta) {
//计算正常的纹理坐标
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
float u = x / (float)width;
float v = y / (float)height;
//纹理坐标变换
u -= 0.5f;
v -= 0.5f;
float tu = u * cosf(theta) - v * sinf(theta) + 0.5f;
float tv = v * cosf(theta) + u * sinf(theta) + 0.5f;
//从纹理中读出,然后写出到全局内存中
output1[y*width + x] = tex2D<float>(texRef, u, v);//未变换的
output2[y*width + x] = tex2D<float>(texRef, tu, tv);//变换的
}
int main()
{
int width = 16, height = 16;
//在设备端分配CUDA数组
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray *cuArray;
cudaMallocArray(&cuArray, &channelDesc, width, height);
float *h_data;
size_t size = width * height * sizeof(float);
h_data = (float*)malloc(size);
for (int i = width * height - 1; i > -1; i--)
h_data[i] = 1 + (rand() % 10) / 1.0;
std::cout << "纹理转换前内容:\n";
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
std::cout << h_data[i*width + j] << "\t";
}
std::cout << "\n";
}
//将主机端内存搬到设备端的CUDA数组中去
cudaMemcpyToArray(cuArray, 0, 0, h_data, size, cudaMemcpyHostToDevice);
//设置纹理引用
texRef.addressMode[0] = cudaAddressModeWrap;
texRef.addressMode[1] = cudaAddressModeWrap;
texRef.filterMode = cudaFilterModeLinear;
texRef.normalized = true;
//将CUDA数组绑定到纹理引用上去
cudaBindTextureToArray(&texRef, cuArray, &channelDesc);
//定义转运接受设备结果的内存
float *output1;
cudaMalloc(&output1, size);
float *output2;
cudaMalloc(&output2, size);
//生成内核
dim3 dimBlock(16, 16);
dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y);
float angle = asinf(1.0 / 2.0);
transformKernel <<<dimGrid, dimBlock >>> (output1, output2, width, height, angle);
//输出结果
float *out;
out = (float*)malloc(size);
cudaMemcpy(out, output1, size, cudaMemcpyDeviceToHost);
std::cout << "未变换坐标纹理转换后的结果:\n";
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
std::cout << out[i*width + j] << "\t";
}
std::cout << "\n";
}
cudaMemcpy(out, output2, size, cudaMemcpyDeviceToHost);
std::cout << "变换坐标纹理转换后的结果:\n";
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
std::cout << out[i*width + j] << "\t";
}
std::cout << "\n";
}
//释放设备内存
cudaFreeArray(cuArray);
cudaFree(output1);
cudaFree(output2);
free(h_data);
free(out);
system("pause");
return 0;
}
结果
效果与纹理对象相同