CUDA 无法识别texture
刚开始学习CUDA的纹理内存,从网上找了学习资料,但是测试的时候,程序却提示有错误:
texture<float, cudaTextureType2D, cudaReadModeElementType> texRef;
output[y*width + x] = tex2D(texRef, tu, tv);
无法识别texture,tex2D
当时第一思路,就是去找这个函数的定义,查找发现是在cuda_texture_types.h文件中定义的,定义如下
template<class T, int texType = cudaTextureType1D, enum cudaTextureReadMode mode = cudaReadModeElementType>
struct __device_builtin_texture_type__ texture : public textureReference
{
#if !defined(__CUDACC_RTC__)
__host__ texture(int norm = 0,
enum cudaTextureFilterMode fMode = cudaFilterModePoint,
enum cudaTextureAddressMode aMode = cudaAddressModeClamp)
{
normalized = norm;
filterMode = fMode;
addressMode[0] = aMode;
addressMode[1] = aMode;
addressMode[2] = aMode;
channelDesc = cudaCreateChannelDesc<T>();
sRGB = 0;
}
__host__ texture(int norm,
enum cudaTextureFilterMode fMode,
enum cudaTextureAddressMode aMode,
struct cudaChannelFormatDesc desc)
{
normalized = norm;
filterMode = fMode;
addressMode[0] = aMode;
addressMode[1] = aMode;
addressMode[2] = aMode;
channelDesc = desc;
sRGB = 0;
}
#endif /* !__CUDACC_RTC__ */
};
,于是在项目中包含这个头文件,结果还是不行,仍然提示这个错误。
经过各种查找和咨询,网上都无法找到解决方案,最后多谢朋友的帮忙,解决了这个问题。
现在将解决方案写下
应该包含cpu_anim.h这个头文件,
即#include "cpu_anim.h"
这个文件我也上传到了资源,需要的盆友可以自行下载
下面附上一个从别处寻来的纹理的使用例子,拿走不谢
#include"cuda_runtime.h"
#include"device_launch_parameters.h"
#include<stdio.h>
#include<stdlib.h>
#include "cuda_texture_types.h"
#include<math.h>
#include "cuda.h"
#include "cpu_anim.h" //调用texture的时候必须加上这个头文件
#define size 256
texture<float, cudaTextureType2D, cudaReadModeElementType> texRef;
__global__ void transformKernel(float* output, int width, int height, float theta)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
float u = x / (float)width;
float v = y / (float)height;
u -= 0.5f;
v -= 0.5f;
float tu = u*cosf(theta) - v*sin(theta) + 0.5f;
float tv = v*cosf(theta) + u*sinf(theta) + 0.5f;
output[y*width + x] = tex2D(texRef, tu, tv);
}
void main()
{
int width = 25, height = 25;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray*cuArray;
cudaMallocArray(&cuArray, &channelDesc, width, height);
float*h_data = (float*)malloc(width*height*sizeof(float));
for (int i = 0; i<height; ++i)
{
for (int j = 0; j<width; ++j)
{
h_data[i*width + j] = i*width + j;
}
}
cudaMemcpyToArray(cuArray, 0, 0, h_data, width*height*sizeof(float), cudaMemcpyHostToDevice);
texRef.addressMode[0] = cudaAddressModeWrap;
texRef.addressMode[1] = cudaAddressModeWrap;
texRef.filterMode = cudaFilterModeLinear;
texRef.normalized = true;
cudaBindTextureToArray(texRef, cuArray, channelDesc);
float*output;
cudaMalloc(&output, width*height*sizeof(float));
dim3 dimBlock(16, 16);
dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y);
float angle = 30;
transformKernel <<<dimGrid, dimBlock >>>(output, width, height, angle);
float*hostPtr = (float*)malloc(sizeof(float)*width*height);
cudaMemcpy(hostPtr, output, sizeof(float)*width*height, cudaMemcpyDeviceToHost);
for (int i = 0; i<height; ++i)
{
for (int j = 0; j<width; ++j)
{
printf("%f", hostPtr[i*width + j]);
}
printf("\n");
}
free(hostPtr);
cudaFreeArray(cuArray);
cudaFree(output);
system("pause");
}