下面是对Texture obj 的实验,代码改自 CUDA C PROGRAMMING GUIDE (PG-02829-001_v10.0 | October 2018) ,因为这里比较贴近计算机图形学,故移到图形学中去。
©版权所有!
/*
下面是对CUDA的texture object的实验,实验环境VS2017,CUDA 10,GTX 1060。
作者:吕翔宇,部分代码改自CUDA手册
E-mail:630056108@qq.com
2019.3.2 17:32
——————
一次修正:补释放h_data,out防止内存泄漏
by吕翔宇
2019.3.3 23:44
*/
#include<amp_graphics.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include<iostream>
#include<cmath>
#include<cuda.h>
//转换的简单例子
__global__ void transformKernel(float *output1,float *output2, cudaTextureObject_t texObj, int width, int height, float theta) {
//计算正常的纹理坐标
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
float u = x / (float)width;
float v = y / (float)height;
//纹理坐标变换
u -= 0.5f;
v -= 0.5f;
float tu = u * cosf(theta) - v * sinf(theta) + 0.5f;
float tv = v * cosf(theta) + u * sinf(theta) + 0.5f;
//从纹理中读出,然后写出到全局内存中
output1[y*width + x] = tex2D<float>(texObj, u, v);//未变换的
output2[y*width + x] = tex2D<float>(texObj, tu, tv);//变换的
}
int main()
{
int width = 16, height = 16;
//在设备端分配CUDA数组
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray *cuArray;
cudaMallocArray(&cuArray, &channelDesc, width, height);
float *h_data;
size_t size = width*height*sizeof(float);
h_data = (float*)malloc(size);
for (int i = width * height-1; i > -1; i--)
h_data[i] = 1+(rand()%10)/1.0;
std::cout << "纹理转换前内容:\n";
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
std::cout << h_data[i*width + j] << "\t";
}
std::cout << "\n";
}
//将主机端内存搬到设备端中去
cudaMemcpyToArray(cuArray, 0, 0, h_data, size, cudaMemcpyHostToDevice);
//设置纹理
struct cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeArray;
resDesc.res.array.array = cuArray;
//设置纹理对象的参数
struct cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.addressMode[0] = cudaAddressModeWrap;
texDesc.addressMode[1] = cudaAddressModeWrap;
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeElementType;
texDesc.normalizedCoords = 1;
//设置纹理对象
cudaTextureObject_t texObj = 0;
cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
//定义转运接受设备结果的内存
float *output1;
cudaMalloc(&output1, size);
float *output2;
cudaMalloc(&output2, size);
//生成内核
dim3 dimBlock(16, 16);
dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y);
float angle = asinf(1.0 / 2.0);
transformKernel <<<dimGrid, dimBlock >>> (output1, output2, texObj, width, height, angle);
//输出结果
float *out;
out = (float*)malloc(size);
cudaMemcpy(out, output1, size, cudaMemcpyDeviceToHost);
std::cout << "未变换坐标纹理转换后的结果:\n";
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
std::cout << out[i*width + j]<<"\t";
}
std::cout << "\n";
}
cudaMemcpy(out, output2, size, cudaMemcpyDeviceToHost);
std::cout << "变换坐标纹理转换后的结果:\n";
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
std::cout << out[i*width + j] << "\t";
}
std::cout << "\n";
}
//销毁纹理对象
cudaDestroyTextureObject(texObj);
//释放设备内存
cudaFreeArray(cuArray);
cudaFree(output1);
cudaFree(output2);
free(h_data);
free(out);
system("pause");
return 0;
}
结果