CDUA 图形学 Texture Reference 实验

最新推荐文章于 2024-08-23 23:10:33 发布

lvxiangyu11

最新推荐文章于 2024-08-23 23:10:33 发布

阅读量551

点赞数

分类专栏：计算机图形学 CUDA 并行计算

本文链接：https://blog.csdn.net/lvxiangyu11/article/details/88081100

版权

并行计算同时被 3 个专栏收录

9 篇文章 1 订阅

订阅专栏

CUDA

6 篇文章 0 订阅

订阅专栏

计算机图形学

4 篇文章 0 订阅

订阅专栏

下面是对Texture Reference的实验，代码改自 CUDA C PROGRAMMING GUIDE (PG-02829-001_v10.0 | October 2018) p54，因为这里比较贴近计算机图形学，故移到图形学中去。

/*
	下面是对CUDA的Texture Reference的实验，实验环境VS2017,CUDA 10，GTX 1060。
	作者：吕翔宇，部分代码改自CUDA手册
	E-mail:630056108@qq.com
	2019.3.2 20.44
	——————
	一次修正：补释放h_data,out防止内存泄漏
	by吕翔宇
	2019.3.3 23:44
*/


#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include<iostream>
#include<cmath>
#define __CUDACC__
#define __cplusplus
#include<texture_fetch_functions.h>
#include<cuda.h>
#include<cuda_texture_types.h>

texture<float, cudaTextureType2D, cudaReadModeElementType>texRef;
//转换的简单例子
__global__ void transformKernel(float *output1, float *output2, int width, int height, float theta) {
	//计算正常的纹理坐标
	unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
	unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;

	float u = x / (float)width;
	float v = y / (float)height;

	//纹理坐标变换
	u -= 0.5f;
	v -= 0.5f;
	float tu = u * cosf(theta) - v * sinf(theta) + 0.5f;
	float tv = v * cosf(theta) + u * sinf(theta) + 0.5f;

	//从纹理中读出，然后写出到全局内存中
	output1[y*width + x] = tex2D<float>(texRef, u, v);//未变换的
	output2[y*width + x] = tex2D<float>(texRef, tu, tv);//变换的
}

int main()
{
	int width = 16, height = 16;
	//在设备端分配CUDA数组
	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
	cudaArray *cuArray;
	cudaMallocArray(&cuArray, &channelDesc, width, height);

	float *h_data;
	size_t size = width * height * sizeof(float);
	h_data = (float*)malloc(size);
	for (int i = width * height - 1; i > -1; i--)
		h_data[i] = 1 + (rand() % 10) / 1.0;
	std::cout << "纹理转换前内容：\n";
	for (int i = 0; i < height; i++) {
		for (int j = 0; j < width; j++) {
			std::cout << h_data[i*width + j] << "\t";
		}
		std::cout << "\n";
	}
	//将主机端内存搬到设备端的CUDA数组中去
	cudaMemcpyToArray(cuArray, 0, 0, h_data, size, cudaMemcpyHostToDevice);

	//设置纹理引用
	texRef.addressMode[0] = cudaAddressModeWrap;
	texRef.addressMode[1] = cudaAddressModeWrap;
	texRef.filterMode = cudaFilterModeLinear;
	texRef.normalized = true;
	
	//将CUDA数组绑定到纹理引用上去
	cudaBindTextureToArray(&texRef, cuArray, &channelDesc);

	//定义转运接受设备结果的内存
	float *output1;
	cudaMalloc(&output1, size);
	float *output2;
	cudaMalloc(&output2, size);

	//生成内核
	dim3 dimBlock(16, 16);
	dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y);
	float angle = asinf(1.0 / 2.0);
	transformKernel <<<dimGrid, dimBlock >>> (output1, output2, width, height, angle);

	//输出结果
	float *out;
	out = (float*)malloc(size);
	cudaMemcpy(out, output1, size, cudaMemcpyDeviceToHost);
	std::cout << "未变换坐标纹理转换后的结果：\n";
	for (int i = 0; i < height; i++) {
		for (int j = 0; j < width; j++) {
			std::cout << out[i*width + j] << "\t";
		}
		std::cout << "\n";
	}
	cudaMemcpy(out, output2, size, cudaMemcpyDeviceToHost);
	std::cout << "变换坐标纹理转换后的结果：\n";
	for (int i = 0; i < height; i++) {
		for (int j = 0; j < width; j++) {
			std::cout << out[i*width + j] << "\t";
		}
		std::cout << "\n";
	}

	//释放设备内存
	cudaFreeArray(cuArray);
	cudaFree(output1);
	cudaFree(output2);
	free(h_data);
	free(out);

	system("pause");
	return 0;
}

结果