#include "head.h"
void gpuInit()
{
cudaSetDevice(0);
size_t width = 5;
size_t height = 5;
float *h_data, *d_data;
size_t pitch;
h_data = (float*)malloc(sizeof(float)*width * height);
for(int i = 0; i < width * height; ++i)
{
h_data[i] = (float)i;
}
std::cout << "Allow memory" << std::endl;
cudaMallocPitch((void**)&d_data,&pitch,sizeof(float)*width, height);
std::cout << "copy to device" << std::endl;
cudaMemcpy2D(d_data,pitch,h_data,sizeof(float)*width,sizeof(float)*width,height,cudaMemcpyHostToDevice);
gpu<<< 1, 1>>>(d_data,height,width,pitch);
cudaDeviceSynchronize();
std::cout << "copy to Host" << std::endl;
cudaMemcpy2D(h_data,sizeof(float)*width,d_data,pitch,sizeof(float)*width,height,cudaMemcpyDeviceToHost);
for(int i = 0; i < width * height; ++i)
{
std::cout << h_data[i] << " ";
if((i + 1) % width == 0)
{
std::cout << std::endl;
}
}
free(h_data);
cudaFree(d_data);
}
__global__ void gpu(float *devPtr,int height,int width,int pitch)
{
int row, col;
float *rowHead;
for(row = 0;row < height; ++row)
{
rowHead = (float*)((char*)devPtr + row * pitch);
for(col = 0; col < width;++col)
{
printf("\t%f",rowHead[col]);
rowHead[col]++;
}
printf("\n");
}
}