下面是对Surfance Object的实验,代码改自 CUDA C PROGRAMMING GUIDE (PG-02829-001_v10.0 | October 2018) p54,因为这里比较贴近计算机图形学,故移到图形学中去。
©版权所有!
/*
下面是对CUDA的surface object的实验,实验环境VS2017,CUDA 10,GTX 1060。
作者:吕翔宇,部分代码改自CUDA手册
E-mail:630056108@qq.com
2019.3.4 0:11
*/
#define __cplusplus
#define __CUDACC__
#include<cuda.h>
#include<iostream>
#include<cmath>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
//定义简单表层对象的复制函数内核
__global__ void copyKernel(//忽略VS2017对global的警告,因为CUDACC定义使头文件互相影响,编译可过
cudaSurfaceObject_t inputSurfObj,cudaSurfaceObject_t outputSurfObj,
int width, int height) {
//计算表层位置
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < width && y < height) {
uchar4 data;
//从输入表层读入
surf2Dread(&data, inputSurfObj, x * 4, y);
//将数据写入到表层中
surf2Dwrite(data, outputSurfObj, x * 4, y);
}
}
int main() {
std::ios::sync_with_stdio(false);//习惯性关闭同步流,看不懂的请忽略。
//注意这里使用cudaChannelFormatKindUnsigned,即处理的是unsigned int
//准备实验参数
const int width = 16;
const int height = 16;
const size_t size = sizeof(unsigned int)*width*height;
unsigned int *h_data;
h_data = (int unsigned*)malloc(size);
//init
std::cout << "初始数据:\n";
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
h_data[i*width + j] = rand() % 100;
std::cout << h_data[i*width + j]<<"\t";
}
std::cout << "\n";
}
//设备端分配CUDA数组
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
cudaArray *cuInputArray;
cudaMallocArray(&cuInputArray, &channelDesc, width, height, cudaArraySurfaceLoadStore);
cudaArray *cuOutputArray;
cudaMallocArray(&cuOutputArray, &channelDesc, width, height, cudaArraySurfaceLoadStore);
//搬运内存
cudaMemcpyToArray(cuInputArray, 0, 0, h_data, size, cudaMemcpyHostToDevice);
//设置表层纹理
struct cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeArray;
//创建表层对象
resDesc.res.array.array = cuInputArray;//设置纹理绑定位置
cudaSurfaceObject_t inputSurfObj = 0;
cudaCreateSurfaceObject(&inputSurfObj, &resDesc);//创建输入纹理对象
resDesc.res.array.array = cuOutputArray;
cudaSurfaceObject_t outputSurfObj = 0;
cudaCreateSurfaceObject(&outputSurfObj, &resDesc);//创建输出纹理对象
//定义内核参数
dim3 dimBlock(16, 16);
dim3 dimGrid(
(width + dimBlock.x - 1) / dimBlock.x,
(height+dimBlock.y-1)/dimBlock.y
);
copyKernel <<<dimGrid, dimBlock >>> (inputSurfObj, outputSurfObj, width, height);
//销毁表层对象
cudaDestroySurfaceObject(inputSurfObj);
cudaDestroySurfaceObject(outputSurfObj);
//输出结果
memset(h_data, 0, size);
//cudaMemcpyFromArray(h_data, cuOutputArray, 0, 0, size, cudaMemcpyDeviceToHost);
cudaMemcpy(h_data, cuOutputArray, size, cudaMemcpyDeviceToHost);
std::cout << "处理后数据:\n";
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
h_data[i*width + j] = rand() % 10;
std::cout << h_data[i*width + j] << "\t";
}
std::cout << "\n";
}
//释放设备内存
cudaFreeArray(cuInputArray);
cudaFreeArray(cuOutputArray);
free(h_data);
system("pause");
return 0;
}
结果
暂时没看懂surface api在干个啥,明天再看,太晚了