CUDA,day-2,二维数组操作

#include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <stdlib.h>
#include <conio.h>
using namespace std;


__global__ void func2(
int * block_x,
int * block_y,
int * thread,
int * warp,
int * cale_thread,
int * xthread,
int * ythread,
int * grid_dimx,
int * grid_dimy,
int * block_dimx,
int * block_dimy);


#define ARRAY_SIZE_X 32
#define ARRAY_SIZE_Y 16


#define ARRAY_SIZE_IN_BYTES ((ARRAY_SIZE_X)*(ARRAY_SIZE_Y)*(sizeof(int)))


int cpu_block_x[ARRAY_SIZE_X][ARRAY_SIZE_Y];
int cpu_block_y[ARRAY_SIZE_X][ARRAY_SIZE_Y];
int cpu_thread[ARRAY_SIZE_X][ARRAY_SIZE_Y];
int cpu_warp[ARRAY_SIZE_X][ARRAY_SIZE_Y];
int cpu_cale_thread[ARRAY_SIZE_X][ARRAY_SIZE_Y];
int cpu_xthread[ARRAY_SIZE_X][ARRAY_SIZE_Y];
int cpu_ythread[ARRAY_SIZE_X][ARRAY_SIZE_Y];
int cpu_grid_dimx[ARRAY_SIZE_X][ARRAY_SIZE_Y];
int cpu_grid_dimy[ARRAY_SIZE_X][ARRAY_SIZE_Y];
int cpu_block_dimx[ARRAY_SIZE_X][ARRAY_SIZE_Y];
int cpu_block_dimy[ARRAY_SIZE_X][ARRAY_SIZE_Y];


int main(void)
{
dim3 threads_rect(32, 4);
dim3 blocks_rect(1, 4);
dim3 threads_square(16, 8);
dim3 blocks_square(2, 2);


int * gpu_block_x;
int * gpu_block_y;
int * gpu_thread;
int * gpu_warp;
int * gpu_cale_thread;
int * gpu_xthread;
int * gpu_ythread;
int * gpu_grid_dimx;
int * gpu_grid_dimy;
int * gpu_block_dimx;
int * gpu_block_dimy;


cudaMalloc((void **)& gpu_block_x, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)& gpu_block_y, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)& gpu_thread, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)& gpu_warp, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)& gpu_cale_thread, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)& gpu_xthread, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)& gpu_ythread, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)& gpu_grid_dimx, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)& gpu_grid_dimy, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)& gpu_block_dimx, ARRAY_SIZE_IN_BYTES);
cudaMalloc((void **)& gpu_block_dimy, ARRAY_SIZE_IN_BYTES);


for (int kernel = 0; kernel < 2; kernel++)
{
switch (kernel)
{
case 0:
{
func2 <<<blocks_rect, threads_rect >>>(gpu_block_x,
 gpu_block_y,
 gpu_thread,
 gpu_warp,
 gpu_cale_thread,
 gpu_xthread,
 gpu_ythread,
 gpu_grid_dimx,
 gpu_grid_dimy,
 gpu_block_dimx,
 gpu_block_dimy);
}break;


case 1:
{
func2 <<<blocks_rect, threads_rect >>>(gpu_block_x,
 gpu_block_y,
 gpu_thread,
 gpu_warp,
 gpu_cale_thread,
 gpu_xthread,
 gpu_ythread,
 gpu_grid_dimx,
 gpu_grid_dimy,
 gpu_block_dimx,
 gpu_block_dimy);
}break;


default:exit(1); break;
}


cudaMemcpy(cpu_block_x, gpu_block_x, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_block_y, gpu_block_y, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_thread, gpu_thread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_cale_thread, gpu_cale_thread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_xthread, gpu_xthread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_ythread, gpu_ythread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_grid_dimx, gpu_grid_dimx, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_grid_dimy, gpu_grid_dimy, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_block_dimx, gpu_block_dimx, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
cudaMemcpy(cpu_block_dimy, gpu_block_dimy, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);


printf("\nkernel %d\n", kernel);


for (int y = 0; y < ARRAY_SIZE_Y; y++)
{
for (int x = 0; x < ARRAY_SIZE_X; x++)
{
printf("CT:%2u BKX: %1u BKY: %1u TID: %2u YTID: %2u XTID: %2u GDX: %1u BDX: %1u GDY %1u BDY %1u\n",
cpu_cale_thread[y][x],
cpu_block_x[y][x],
cpu_block_y[y][x],
cpu_thread[y][x],
cpu_ythread[y][x],
cpu_xthread[y][x],
cpu_grid_dimx[y][x],
cpu_block_dimx[y][x],
cpu_grid_dimy[y][x],
cpu_block_dimy[y][x]);
//cin.get();
}
}
printf("press any key to continue\n");
cin.get();
}


return 0;
}


__global__ void func2(
int * block_x,
int * block_y,
int * thread,
int * warp,
int * cale_thread,
int * xthread,
int * ythread,
int * grid_dimx,
int * grid_dimy,
int * block_dimx,
int * block_dimy)
{
int idx = (blockIdx.x*blockDim.x) + threadIdx.x;
int idy = (blockIdx.y*blockDim.y) + threadIdx.y;
int thread_idx = ((gridDim.x*blockDim.x)*idy) + idx;


block_x[thread_idx] = blockIdx.x;
block_y[thread_idx] = blockIdx.y;
thread[thread_idx] = threadIdx.x;
cale_thread[thread_idx] = thread_idx;
xthread[thread_idx] = idx;
ythread[thread_idx] = idy;
grid_dimx[thread_idx] = gridDim.x;
grid_dimy[thread_idx] = gridDim.y;
block_dimx[thread_idx] = blockDim.x;
block_dimy[thread_idx] = blockDim.y;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

RtZero

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值