下面的代码分配了一个尺寸为width*height的二维浮点数组,同时演示了怎样在设备代码中遍历数组元素。
//Host code
int width=64,height=64;
float *devPtr;
size_t pitch;
cudaMallocPitch(&devPtr,&pitch,width * sizeof(float),height);
MyKernel<<<100,512>>>(devPtr,pitch,width,height);
//Device code
__global__ void MyKernel(float *devPtr,size_t pitch,int width,int height){
for(int r=0;r<height;++r){
float *row =(float* )((char* )devPtr+ r*pitch)
for(int c=0;c<width;++c){
float element=row[c];
}
}
}
下面的代码分配了一个尺寸为width*height*depth的三位浮点数组,同时演示了怎样在设备代码中遍历数组元素
//Host code
int width=64,height=64,depth=64;
cudaExtent extent=make_cudaExtent(width * sizeof(float),height,depth);
cudaPitchedPtr devPitchedPtr;
cudaMalloc3D(&devPitchedPtr,extent);
MyKernel<<<100,512>>>(devPitchedPtr,width,height,depth);
//Device code
__global__ void MyKernel(cudaPitchedPtr devPitchedPtr,int width,int height,int depth){
chat* devPtr=devPitchedPtr.ptr;
size_t pitch=devPitchedPtr.pitch;
size_t slicePitch=pitch*height;
for(int z=0;z<depth;++z){
char* slice=devPtr +z*slicePitch;
for(int y=0;y<height;++y){
float* row=(float*)(slice+y*pitch);
for(int x=0;x<width;++x){
float element=row[x];
}
}
}
}