cuda 9.2 卸载出错

展开阅读全文

CUDA读取纹理内存出错

10-18

如题,这个问题很让人困惑,我使用CUDA数组和cudaMallocPitch分配的一维数组都不好用,都出现乱码rn我的kernel函数为:rnrn[code=c]rnrntexture texSrc;rn__inline__ __global__ void convolutionRows_kernel( float *d_Dst, int imageH, int imageW)rn rn int ix = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;rn int iy = __mul24(blockIdx.y, blockDim.y) + threadIdx.y; rn while(ix < imageW && iy < imageH) rn rn float fx = (float) ix + 0.5f; rn float fy = (float) iy + 0.5f; rn d_Dst[ix + __mul24(iy, imageW)] = tex2D(texSrc, fx, fy);rn ix += blockDim.x; rn iy += blockDim.y; rn rnrnrnrn[/code]rnrnrnrnrn#1-- CUDA数组的使用是:rn#1.1--下面的代码是绑定CUDA数组到texture及函数调用rn[code=c]rnstatic void convolutionRowsGPU(float* d_DstData, cudaArray* d_srcDataArray, cudaChannelFormatDesc floatTex, unsigned int imageHeight, unsigned int imageWidth)rnrn dim3 myBlock(16, 12);rn dim3 myGrid((imageWidth+(blockDim.x -1))/blockDim.x, (imageHeight +(blockDim.y -1))/ blockDim.y ); rn //#1-- bind array to texture rn CUDA_SAFE_CALL( cudaBindTextureToArray(texSrc, d_srcDataArray, floatTex)); rn //#2 -- call kernel function for row convolution rn convolutionRows_kernel <<< myGrid , myBlock >>>(d_DstData, imageHeight, imageWidth);rn cudaGetLastErrorWrapper("convolutionRows_kernel"); rn //#3--unbind texture CUDA_SAFE_CALL(cudaUnbindTexture(texSrc)); rnrnrn[/code]rnrn#1.2--下面是CUDA数组相关的纹理使用rn[code=c]rnunsigned int imageHeight = 640; rn unsigned int imageWidth = 480; rnfloat* h_sorce2DImage_f = new float[ imageHeight * imageWidth ];rn//分配CUDA数组并赋值rncudaArray* d_fDataArray; rncudaChannelFormatDesc floatTex = cudaCreateChannelDesc();rncudaMallocArrayWrapper(&d_fDataArray, & floatTex, imageWidth, imageHeight); rn//d_sorce2DImage_f 是定义的device端变量,大小为imageHeight * imageWidth *sizeof(float) rncudaMemcpyToArrayWrapper(d_fDataArray, 0, 0, d_sorce2DImage_f, imageSize * sizeof(float), cudaMemcpyDeviceToDevice); rnrn//测试1,测试CUDA数组内容rncudaError_t myError = cudaMemcpy2DFromArray(h_sorce2DImage_f, imageWidth, d_fDataArray, 0, 0, imageWidth, imageHeight, cudaMemcpyDeviceToHost);rnWiteDataToFile("filted1.txt", h_sorce2DImage_f, imageWidth, imageHeight);rnrn//调用函数,主要是读取纹理内容rnconvolutionRowsGPU (d_sorce2DImage_f, d_fDataArray,floatTex, imageHeight, imageWidth);rn//测试2,测试从纹理中读取的内容rncudaMemcpyWrapper(h_sorce2DImage_f, d_sorce2DImage_f, imageSize * sizeof(float), cudaMemcpyDeviceToHost);rnWiteDataToFile("filted2.txt", h_sorce2DImage_f, imageWidth, imageHeight);rnrndelete [] h_sorce2DImage_f ;rn[/code]rnrn注:其中函数cudaMallocArrayWrapper(),cudaMemcpyToArrayWrapper(),cudaMemcpyWrapper()只是为了方便进行了封装,不过只是在调用对应CUDA的函数后加上宏 CUDA_SAFE_CALL ()而已。rnrn#2--绑定cudaMallocPitch分配的线性内存到纹理内存的情况rn这里面的原因是因为cudaMalloc分配的pitch不是2的倍数,在绑定内存时会出错。rnrn#2.1--下面的代码是绑定cudaMallocPitch分配的线性内存到纹理内存及相关调用rnrn[code=c]rnstatic void convolutionRowsGPU(float* d_DstData, const float* d_bindLinearMem, unsigned int imageHeight, unsigned int imageWidth, size_t pitch)rnrn dim3 myBlock(16, 12); rn dim3 myGrid((imageWidth+(blockDim.x -1))/blockDim.x, (imageHeight +(blockDim.y -1))/ blockDim.y );rn size_t offset; rn //绑定纹理内存rn cudaChannelFormatDesc chennelDesc = cudaCreateChannelDesc(); rn cudaBindTexture2D(&offset, texSrc, d_bindLinearMem, chennelDesc, imageWidth, imageHeight, pitch);rn //调用核函数rn convolutionRows_kernel <<>(d_DstData, imageHeight, imageWidth); rn cudaGetLastErrorWrapper("convolutionRows_kernel"); rn //解绑定rn CUDA_SAFE_CALL(cudaUnbindTexture(texSrc)); rn rn[/code]rnrn#2.2--下面是 cudaMallocPitch分配的线性内存相关的纹理使用rn[code=c]rnunsigned int imageHeight = 640; rn unsigned int imageWidth = 480; rnfloat* h_sorce2DImage_f = new float[ imageHeight * imageWidth ];rnrn//cudaMallocPitchWrapper分配内存,并赋值rnfloat* d_linearGloabalMem; rnsize_t pitch; rncudaMallocPitchWrapper(((void**)& d_linearGloabalMem), &pitch, imageWidth * sizeof(float), imageHeight);rnmyError = cudaMemcpy2D(d_linearGloabalMem, pitch, d_sorce2DImage_f, imageWidth * sizeof(float), imageWidth * sizeof(float), imageHeight, cudaMemcpyDeviceToDevice);rnrn//测试3,测试cudaMallocPitch()分配的内存内容rnmyError = cudaMemcpy2D(h_sorce2DImage_f, imageWidth * sizeof(float), d_linearGloabalMem, pitch, imageWidth * sizeof(float), imageHeight, cudaMemcpyDeviceToHost);rnWiteDataToFile("filted3.txt", h_sorce2DImage_f, imageWidth, imageHeight);rnrn//调用函数,主要是读取纹理内容rnconvolutionRowsGPU (d_sorce2DImage_f, d_linearGloabalMem, imageHeight, imageWidth, pitch);rnrn//测试4,测试从纹理中读取的内容rncudaMemcpyWrapper(h_sorce2DImage_f, d_sorce2DImage_f, imageSize * sizeof(float), cudaMemcpyDeviceToHost);rnWiteDataToFile("filted4.txt", h_sorce2DImage_f, imageWidth, imageHeight);rndelete [] h_sorce2DImage_f ;rn[/code]rnrn很令人感到意外的是测试1 和测试3都有正确的结果,但是测试2 和测试4都是乱码,这个我确实不解,有那个大牛帮忙分析下。rnrn 论坛

没有更多推荐了,返回首页