CUDA内存拷贝

分享一下我老师大神的人工智能教程!零基础,通俗易懂!http://blog.csdn.net/jiangjunshow

也欢迎大家转载本篇文章。分享知识,造福人民,实现我们中华民族伟大复兴!

               

http://www.cnblogs.com/traceorigin/archive/2013/04/12/3016568.html

CUDA内存拷贝

1、cudaMemcpy()<--> cudaMalloc()  //线性内存拷贝

1 //线性内存拷贝2 cudaMalloc((void**)&dev_A, data_size);3 cudaMemcpy(dev_A, host_A, data_size, cudaMemcpyHostToDevice);

2、cudaMemcpy2D()<-->cudaMallocPitch() //线性内存拷贝

cudaError_t cudaMemcpy2D(        void *     dst,    size_t     dpitch,    const void *     src,    size_t     spitch,    size_t     width,    size_t     height,    enum cudaMemcpyKind     kind     )    

例:

1 cudaMallocPitch((void**)&devPtr, &pitch, width * sizeof(float), height); 2 cudaMemcpy2D( void* dst,size_t dpitch,const void* src,size_t spitch,size_t width,size_t height,enum cudaMemcpyKind kind )

3、cudaMemcpy2DToArray()<-->cudaMallocArray() //(二维)线性内存到2维数组的拷贝

 1 cudaError_t cudaMemcpy2DToArray    (     2     struct cudaArray *     dst, 3     size_t     wOffset, 4     size_t     hOffset, 5     const void *     src, 6     size_t     spitch, 7     size_t     width, 8     size_t     height, 9     enum cudaMemcpyKind     kind     10 )    

例:

 1 void mv(float *y, float *A, float *x, int m, int n) 2 { 3     int blkNum = (m >> 4) + ((m & 15) ? 1 : 0);  4     int height = blkNum << 4; 5     int width = (n & 255) ? (((n >> 8) + 1) << 8) : n; 6     dim3 threads(16, 16); 7     dim3 grid(blkNum, 1); 8     cudaArray *d_A; 9     float *d_x, *d_y;10 11     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float4>();12     cudaMallocArray(&d_A, &channelDesc, width >> 2, height);13     cudaMemcpy2DToArray(d_A, 0, 0, A, n * sizeof(float), n * sizeof(float), m, cudaMemcpyHostToDevice);14     cudaBindTextureToArray(texRefA, d_A);15     cudaMalloc((void **) &d_x, n * sizeof(float));16     cudaMalloc((void **) &d_y, m * sizeof(float));17 18     cudaMemcpy(d_x, x, n * sizeof(float), cudaMemcpyHostToDevice);19     mv_kernel<<< grid, threads >>>(d_y, d_A, d_x, m, n);20     cudaMemcpy(y, d_y, m * sizeof(float), cudaMemcpyDeviceToHost);21 22     cudaFree(d_y);23     cudaFree(d_x);24     cudaUnbindTexture(texRefA);25     cudaFreeArray(d_A);26 }

4、cudaMemcpyToArray()<-->cudaMallocArray()  //(1维)线性内存到2维数组的拷贝

1 cudaError_t cudaMemcpyToArray(    2     struct cudaArray *     dst,3     size_t     wOffset,4     size_t     hOffset,5     const void *     src,6     size_t     count,7     enum cudaMemcpyKind     kind     8 )    

例:

 1 void initCudaTexture(float *h_volume, float2 *velocity) 2 { 3     cudaChannelFormatDesc desc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); 4  5     cudaMallocArray(&d_volumeArray, &desc, 128, 128); 6  7     cudaMemcpyToArray(d_volumeArray, 0, 0, h_volume, sizeof(float)*128*128, cudaMemcpyDeviceToDevice); 8  9     tex.normalized = true;10     tex.filterMode = cudaFilterModeLinear;11     tex.addressMode[0] = cudaAddressModeWrap;12     tex.addressMode[1] = cudaAddressModeWrap;13     14     cutilSafeCall(cudaBindTextureToArray(tex, d_volumeArray));15 16 }

5、cudaMemcpy3D()<-->cudaMalloc3DArray() //(1维)线性内存到3维数组的拷贝 

 1 cudaError_t cudaMemcpy3D(const struct cudaMemcpy3DParms *     p)      2  3 struct cudaExtent { 4   size_t width; 5   size_t height; 6   size_t depth; 7 }; 8 struct cudaExtent make_cudaExtent(size_t w, size_t h, size_t d); 9 10 struct cudaPos {11   size_t x;12   size_t y;13   size_t z;14 };15 struct cudaPos make_cudaPos(size_t x, size_t y, size_t z);16 17 struct cudaMemcpy3DParms {18   struct cudaArray     *srcArray;19   struct cudaPos        srcPos;20   struct cudaPitchedPtr srcPtr;21   struct cudaArray     *dstArray;22   struct cudaPos        dstPos;23   struct cudaPitchedPtr dstPtr;24   struct cudaExtent     extent;25   enum cudaMemcpyKind   kind;26 };

 例: 

 1 void initCudaTexture(const uchar *h_volume, cudaExtent volumeSize) 2 { 3     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar>(); 4  5     cutilSafeCall(cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize)); 6  7     cudaMemcpy3DParms copyParams = {0}; 8     copyParams.srcPtr = make_cudaPitchedPtr((void*)h_volume, volumeSize.width*sizeof(uchar), volumeSize.width, volumeSize.height); 9     copyParams.dstArray = d_volumeArray;10     copyParams.extent   = volumeSize;11     copyParams.kind     = cudaMemcpyHostToDevice;12     cutilSafeCall(cudaMemcpy3D(&copyParams));13 14     tex.normalized = true;15     tex.filterMode = cudaFilterModeLinear;16     tex.addressMode[0] = cudaAddressModeWrap;17     tex.addressMode[1] = cudaAddressModeWrap;18     tex.addressMode[2] = cudaAddressModeWrap;19 20     cutilSafeCall(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));21 }

6、cudaMemcpyToSymbol()  //拷贝到常数存储器

1 __constant__ float constData[256];2 float data[256];3 cudaMemcpyToSymbol(constData, data, sizeof(data));4 cudaMemcpyFromSymbol(data, constData, sizeof(data));5 __device__ float devData; float value = 3.14f;6 cudaMemcpyToSymbol(devData, &value, sizeof(float));7 __device__ float* devPointer; float* ptr;8 cudaMalloc(&ptr, 256 * sizeof(float));9 cudaMemcpyToSymbol(devPointer, &ptr, sizeof(ptr));

           

给我老师的人工智能教程打call!http://blog.csdn.net/jiangjunshow
这里写图片描述
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值