CUDA user-defined dim3

#include<stdio.h>
#include<stdlib.h>
#include<conio.h>

typedef unsigned int * const uipc;
typedef const unsigned int cui;
typedef unsigned int ui;

__global__ void what_is_my_id_2d_A(
    uipc block_x,
    uipc block_y,
    uipc thread,
    uipc calc_thread,
    uipc x_thread,
    uipc y_thread,
    uipc grid_dimx,
    uipc block_dimx,
    uipc grid_dimy,
    uipc block_dimy
)
{
    cui idx = (blockIdx.x*blockDim.x)+threadIdx.x;
    cui idy = (blockIdx.y*blockDim.y)+threadIdx.y;
    cui thread_idx = ((gridDim.x*blockDim.x)*idy)+idx;
    block_x[thread_idx] = blockIdx.x;
    block_y[thread_idx] = blockIdx.y;
    thread[thread_idx] = threadIdx.x;
    calc_thread[thread_idx] = thread_idx;
    x_thread[thread_idx] = idx;
    y_thread[thread_idx] = idy;
    grid_dimx[thread_idx] = gridDim.x;
    grid_dimy[thread_idx] = gridDim.y;
    block_dimx[thread_idx] = blockDim.x;
    block_dimy[thread_idx] = blockDim.y;

}

#define ARRAY_SIZE_X 32
#define ARRAY_SIZE_Y 16
#define ARRAY_SIZE_IN_BYTES ((ARRAY_SIZE_X)*(ARRAY_SIZE_Y)*(sizeof(unsigned int)))

ui cpu_block_x[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_block_y[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_thread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_warp[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_calc_thread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_xthread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_ythread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_grid_dimx[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_block_dimx[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_grid_dimy[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_block_dimy[ARRAY_SIZE_Y][ARRAY_SIZE_X];


int main(){


    //the following x : width ; the following y : height;
    //block x:32 y:4
    const dim3 threads_rect(32,4);
    //grid x:1 y:4
    const dim3 blocks_rect(1,4);

    //block x:16 y:8
    const dim3 threads_square(16,8);
    //grid x:2 y:2
    const dim3 blocks_square(2,2);


    ui * gpu_block_x;
    ui * gpu_block_y;
    ui * gpu_thread;
    ui * gpu_warp;
    ui * gpu_calc_thread;
    ui * gpu_xthread;
    ui * gpu_ythread;
    ui * gpu_grid_dimx;
    ui * gpu_block_dimx;
    ui * gpu_grid_dimy;
    ui * gpu_block_dimy;

    //allocate gpu memory
    cudaMalloc((void**)&gpu_block_x,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_block_y,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_thread,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_calc_thread,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_xthread,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_ythread,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_grid_dimx,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_block_dimx,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_grid_dimy,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_block_dimy,ARRAY_SIZE_IN_BYTES);

    for(int kernel = 0;kernel < 2;kernel++){
        switch(kernel){
            case 0:
                what_is_my_id_2d_A<<<blocks_rect,threads_rect>>>(gpu_block_x,
                        gpu_block_y,
                        gpu_thread,
                        gpu_calc_thread,
                        gpu_xthread,
                        gpu_ythread,
                        gpu_grid_dimx,
                        gpu_block_dimx,
                        gpu_grid_dimy,
                        gpu_block_dimy
                        );
                break;
            case 1:
                 what_is_my_id_2d_A<<<blocks_square,threads_square>>>(gpu_block_x,
                        gpu_block_y,
                        gpu_thread,
                        gpu_calc_thread,
                        gpu_xthread,
                        gpu_ythread,
                        gpu_grid_dimx,
                        gpu_block_dimx,
                        gpu_grid_dimy,
                        gpu_block_dimy
                        );
                break;
            default:
                exit(1);break;
        }
    //copy result from gpu memory to cpu memory
    cudaMemcpy(cpu_block_x,gpu_block_x,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_block_y,gpu_block_y,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_thread,gpu_thread,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_calc_thread,gpu_calc_thread,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_xthread,gpu_xthread,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_ythread,gpu_ythread,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_grid_dimx,gpu_grid_dimx,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_grid_dimy,gpu_grid_dimy,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_block_dimx,gpu_block_dimx,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_block_dimy,gpu_block_dimy,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);

    char ch;
    printf("\nkernel %d\n",kernel);
    
    for(int y=0;y<ARRAY_SIZE_Y;y++){
        for(int x=0;x<ARRAY_SIZE_X;x++){
            printf("CT:%2u BKX:%1u BKY:%1u TID:%2u YTID:%2u XTID:%2u GDX:%1u BDX:%1u GDY:%1u BDX:%1u\n",
                cpu_calc_thread[y][x],
                cpu_block_x[y][x],
                cpu_block_y[y][x],
                cpu_thread[y][x],
                cpu_ythread[y][x],
                cpu_xthread[y][x],
                cpu_grid_dimx[y][x],
                cpu_block_dimx[y][x],
                cpu_grid_dimy[y][x],
                cpu_block_dimy[y][x]
                );
            ch = getch();
        }
        printf("press any key to continue\n");
        ch = getch();
    }   

    }

    

    cudaFree(gpu_block_x);
    cudaFree(gpu_block_y);
    cudaFree(gpu_thread);
    cudaFree(gpu_calc_thread);
    cudaFree(gpu_xthread);
    cudaFree(gpu_ythread);
    cudaFree(gpu_grid_dimx);
    cudaFree(gpu_grid_dimy);
    cudaFree(gpu_block_dimx);
    cudaFree(gpu_block_dimy);
    return 0;

}


kernel 0
CT: 0 BKX:0 BKY:0 TID: 0 YTID: 0 XTID: 0 GDX:1 BDX:32 GDY:4 BDX:4
CT: 1 BKX:0 BKY:0 TID: 1 YTID: 0 XTID: 1 GDX:1 BDX:32 GDY:4 BDX:4
CT: 2 BKX:0 BKY:0 TID: 2 YTID: 0 XTID: 2 GDX:1 BDX:32 GDY:4 BDX:4
CT: 3 BKX:0 BKY:0 TID: 3 YTID: 0 XTID: 3 GDX:1 BDX:32 GDY:4 BDX:4
CT: 4 BKX:0 BKY:0 TID: 4 YTID: 0 XTID: 4 GDX:1 BDX:32 GDY:4 BDX:4
CT: 5 BKX:0 BKY:0 TID: 5 YTID: 0 XTID: 5 GDX:1 BDX:32 GDY:4 BDX:4
CT: 6 BKX:0 BKY:0 TID: 6 YTID: 0 XTID: 6 GDX:1 BDX:32 GDY:4 BDX:4
CT: 7 BKX:0 BKY:0 TID: 7 YTID: 0 XTID: 7 GDX:1 BDX:32 GDY:4 BDX:4
CT: 8 BKX:0 BKY:0 TID: 8 YTID: 0 XTID: 8 GDX:1 BDX:32 GDY:4 BDX:4
CT: 9 BKX:0 BKY:0 TID: 9 YTID: 0 XTID: 9 GDX:1 BDX:32 GDY:4 BDX:4
CT:10 BKX:0 BKY:0 TID:10 YTID: 0 XTID:10 GDX:1 BDX:32 GDY:4 BDX:4
CT:11 BKX:0 BKY:0 TID:11 YTID: 0 XTID:11 GDX:1 BDX:32 GDY:4 BDX:4
CT:12 BKX:0 BKY:0 TID:12 YTID: 0 XTID:12 GDX:1 BDX:32 GDY:4 BDX:4
CT:13 BKX:0 BKY:0 TID:13 YTID: 0 XTID:13 GDX:1 BDX:32 GDY:4 BDX:4
CT:14 BKX:0 BKY:0 TID:14 YTID: 0 XTID:14 GDX:1 BDX:32 GDY:4 BDX:4
CT:15 BKX:0 BKY:0 TID:15 YTID: 0 XTID:15 GDX:1 BDX:32 GDY:4 BDX:4
CT:16 BKX:0 BKY:0 TID:16 YTID: 0 XTID:16 GDX:1 BDX:32 GDY:4 BDX:4
CT:17 BKX:0 BKY:0 TID:17 YTID: 0 XTID:17 GDX:1 BDX:32 GDY:4 BDX:4
CT:18 BKX:0 BKY:0 TID:18 YTID: 0 XTID:18 GDX:1 BDX:32 GDY:4 BDX:4
CT:19 BKX:0 BKY:0 TID:19 YTID: 0 XTID:19 GDX:1 BDX:32 GDY:4 BDX:4
CT:20 BKX:0 BKY:0 TID:20 YTID: 0 XTID:20 GDX:1 BDX:32 GDY:4 BDX:4
CT:21 BKX:0 BKY:0 TID:21 YTID: 0 XTID:21 GDX:1 BDX:32 GDY:4 BDX:4
CT:22 BKX:0 BKY:0 TID:22 YTID: 0 XTID:22 GDX:1 BDX:32 GDY:4 BDX:4
CT:23 BKX:0 BKY:0 TID:23 YTID: 0 XTID:23 GDX:1 BDX:32 GDY:4 BDX:4
CT:24 BKX:0 BKY:0 TID:24 YTID: 0 XTID:24 GDX:1 BDX:32 GDY:4 BDX:4
CT:25 BKX:0 BKY:0 TID:25 YTID: 0 XTID:25 GDX:1 BDX:32 GDY:4 BDX:4
CT:26 BKX:0 BKY:0 TID:26 YTID: 0 XTID:26 GDX:1 BDX:32 GDY:4 BDX:4
CT:27 BKX:0 BKY:0 TID:27 YTID: 0 XTID:27 GDX:1 BDX:32 GDY:4 BDX:4
CT:28 BKX:0 BKY:0 TID:28 YTID: 0 XTID:28 GDX:1 BDX:32 GDY:4 BDX:4
CT:29 BKX:0 BKY:0 TID:29 YTID: 0 XTID:29 GDX:1 BDX:32 GDY:4 BDX:4
CT:30 BKX:0 BKY:0 TID:30 YTID: 0 XTID:30 GDX:1 BDX:32 GDY:4 BDX:4
CT:31 BKX:0 BKY:0 TID:31 YTID: 0 XTID:31 GDX:1 BDX:32 GDY:4 BDX:4
press any key to continue
CT:32 BKX:0 BKY:0 TID: 0 YTID: 1 XTID: 0 GDX:1 BDX:32 GDY:4 BDX:4
CT:33 BKX:0 BKY:0 TID: 1 YTID: 1 XTID: 1 GDX:1 BDX:32 GDY:4 BDX:4

......................

CT:470 BKX:1 BKY:1 TID: 6 YTID:14 XTID:22 GDX:2 BDX:16 GDY:2 BDX:8
CT:471 BKX:1 BKY:1 TID: 7 YTID:14 XTID:23 GDX:2 BDX:16 GDY:2 BDX:8
CT:472 BKX:1 BKY:1 TID: 8 YTID:14 XTID:24 GDX:2 BDX:16 GDY:2 BDX:8
CT:473 BKX:1 BKY:1 TID: 9 YTID:14 XTID:25 GDX:2 BDX:16 GDY:2 BDX:8
CT:474 BKX:1 BKY:1 TID:10 YTID:14 XTID:26 GDX:2 BDX:16 GDY:2 BDX:8
CT:475 BKX:1 BKY:1 TID:11 YTID:14 XTID:27 GDX:2 BDX:16 GDY:2 BDX:8
CT:476 BKX:1 BKY:1 TID:12 YTID:14 XTID:28 GDX:2 BDX:16 GDY:2 BDX:8
CT:477 BKX:1 BKY:1 TID:13 YTID:14 XTID:29 GDX:2 BDX:16 GDY:2 BDX:8
CT:478 BKX:1 BKY:1 TID:14 YTID:14 XTID:30 GDX:2 BDX:16 GDY:2 BDX:8
CT:479 BKX:1 BKY:1 TID:15 YTID:14 XTID:31 GDX:2 BDX:16 GDY:2 BDX:8
press any key to continue
CT:480 BKX:0 BKY:1 TID: 0 YTID:15 XTID: 0 GDX:2 BDX:16 GDY:2 BDX:8
CT:481 BKX:0 BKY:1 TID: 1 YTID:15 XTID: 1 GDX:2 BDX:16 GDY:2 BDX:8
CT:482 BKX:0 BKY:1 TID: 2 YTID:15 XTID: 2 GDX:2 BDX:16 GDY:2 BDX:8
CT:483 BKX:0 BKY:1 TID: 3 YTID:15 XTID: 3 GDX:2 BDX:16 GDY:2 BDX:8
CT:484 BKX:0 BKY:1 TID: 4 YTID:15 XTID: 4 GDX:2 BDX:16 GDY:2 BDX:8
CT:485 BKX:0 BKY:1 TID: 5 YTID:15 XTID: 5 GDX:2 BDX:16 GDY:2 BDX:8
CT:486 BKX:0 BKY:1 TID: 6 YTID:15 XTID: 6 GDX:2 BDX:16 GDY:2 BDX:8
CT:487 BKX:0 BKY:1 TID: 7 YTID:15 XTID: 7 GDX:2 BDX:16 GDY:2 BDX:8
CT:488 BKX:0 BKY:1 TID: 8 YTID:15 XTID: 8 GDX:2 BDX:16 GDY:2 BDX:8
CT:489 BKX:0 BKY:1 TID: 9 YTID:15 XTID: 9 GDX:2 BDX:16 GDY:2 BDX:8
CT:490 BKX:0 BKY:1 TID:10 YTID:15 XTID:10 GDX:2 BDX:16 GDY:2 BDX:8
CT:491 BKX:0 BKY:1 TID:11 YTID:15 XTID:11 GDX:2 BDX:16 GDY:2 BDX:8
CT:492 BKX:0 BKY:1 TID:12 YTID:15 XTID:12 GDX:2 BDX:16 GDY:2 BDX:8
CT:493 BKX:0 BKY:1 TID:13 YTID:15 XTID:13 GDX:2 BDX:16 GDY:2 BDX:8
CT:494 BKX:0 BKY:1 TID:14 YTID:15 XTID:14 GDX:2 BDX:16 GDY:2 BDX:8
CT:495 BKX:0 BKY:1 TID:15 YTID:15 XTID:15 GDX:2 BDX:16 GDY:2 BDX:8
CT:496 BKX:1 BKY:1 TID: 0 YTID:15 XTID:16 GDX:2 BDX:16 GDY:2 BDX:8
CT:497 BKX:1 BKY:1 TID: 1 YTID:15 XTID:17 GDX:2 BDX:16 GDY:2 BDX:8
CT:498 BKX:1 BKY:1 TID: 2 YTID:15 XTID:18 GDX:2 BDX:16 GDY:2 BDX:8
CT:499 BKX:1 BKY:1 TID: 3 YTID:15 XTID:19 GDX:2 BDX:16 GDY:2 BDX:8
CT:500 BKX:1 BKY:1 TID: 4 YTID:15 XTID:20 GDX:2 BDX:16 GDY:2 BDX:8
CT:501 BKX:1 BKY:1 TID: 5 YTID:15 XTID:21 GDX:2 BDX:16 GDY:2 BDX:8
CT:502 BKX:1 BKY:1 TID: 6 YTID:15 XTID:22 GDX:2 BDX:16 GDY:2 BDX:8
CT:503 BKX:1 BKY:1 TID: 7 YTID:15 XTID:23 GDX:2 BDX:16 GDY:2 BDX:8
CT:504 BKX:1 BKY:1 TID: 8 YTID:15 XTID:24 GDX:2 BDX:16 GDY:2 BDX:8
CT:505 BKX:1 BKY:1 TID: 9 YTID:15 XTID:25 GDX:2 BDX:16 GDY:2 BDX:8
CT:506 BKX:1 BKY:1 TID:10 YTID:15 XTID:26 GDX:2 BDX:16 GDY:2 BDX:8
CT:507 BKX:1 BKY:1 TID:11 YTID:15 XTID:27 GDX:2 BDX:16 GDY:2 BDX:8
CT:508 BKX:1 BKY:1 TID:12 YTID:15 XTID:28 GDX:2 BDX:16 GDY:2 BDX:8
CT:509 BKX:1 BKY:1 TID:13 YTID:15 XTID:29 GDX:2 BDX:16 GDY:2 BDX:8
CT:510 BKX:1 BKY:1 TID:14 YTID:15 XTID:30 GDX:2 BDX:16 GDY:2 BDX:8
CT:511 BKX:1 BKY:1 TID:15 YTID:15 XTID:31 GDX:2 BDX:16 GDY:2 BDX:8
press any key to continue

参考


阅读更多
想对作者说点什么?

博主推荐

换一批

没有更多推荐了,返回首页