CUDA user-defined dim3

4人阅读 评论(0) 收藏 举报
分类:
#include<stdio.h>
#include<stdlib.h>
#include<conio.h>

typedef unsigned int * const uipc;
typedef const unsigned int cui;
typedef unsigned int ui;

__global__ void what_is_my_id_2d_A(
    uipc block_x,
    uipc block_y,
    uipc thread,
    uipc calc_thread,
    uipc x_thread,
    uipc y_thread,
    uipc grid_dimx,
    uipc block_dimx,
    uipc grid_dimy,
    uipc block_dimy
)
{
    cui idx = (blockIdx.x*blockDim.x)+threadIdx.x;
    cui idy = (blockIdx.y*blockDim.y)+threadIdx.y;
    cui thread_idx = ((gridDim.x*blockDim.x)*idy)+idx;
    block_x[thread_idx] = blockIdx.x;
    block_y[thread_idx] = blockIdx.y;
    thread[thread_idx] = threadIdx.x;
    calc_thread[thread_idx] = thread_idx;
    x_thread[thread_idx] = idx;
    y_thread[thread_idx] = idy;
    grid_dimx[thread_idx] = gridDim.x;
    grid_dimy[thread_idx] = gridDim.y;
    block_dimx[thread_idx] = blockDim.x;
    block_dimy[thread_idx] = blockDim.y;

}

#define ARRAY_SIZE_X 32
#define ARRAY_SIZE_Y 16
#define ARRAY_SIZE_IN_BYTES ((ARRAY_SIZE_X)*(ARRAY_SIZE_Y)*(sizeof(unsigned int)))

ui cpu_block_x[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_block_y[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_thread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_warp[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_calc_thread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_xthread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_ythread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_grid_dimx[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_block_dimx[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_grid_dimy[ARRAY_SIZE_Y][ARRAY_SIZE_X];
ui cpu_block_dimy[ARRAY_SIZE_Y][ARRAY_SIZE_X];


int main(){


    //the following x : width ; the following y : height;
    //block x:32 y:4
    const dim3 threads_rect(32,4);
    //grid x:1 y:4
    const dim3 blocks_rect(1,4);

    //block x:16 y:8
    const dim3 threads_square(16,8);
    //grid x:2 y:2
    const dim3 blocks_square(2,2);


    ui * gpu_block_x;
    ui * gpu_block_y;
    ui * gpu_thread;
    ui * gpu_warp;
    ui * gpu_calc_thread;
    ui * gpu_xthread;
    ui * gpu_ythread;
    ui * gpu_grid_dimx;
    ui * gpu_block_dimx;
    ui * gpu_grid_dimy;
    ui * gpu_block_dimy;

    //allocate gpu memory
    cudaMalloc((void**)&gpu_block_x,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_block_y,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_thread,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_calc_thread,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_xthread,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_ythread,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_grid_dimx,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_block_dimx,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_grid_dimy,ARRAY_SIZE_IN_BYTES);
    cudaMalloc((void**)&gpu_block_dimy,ARRAY_SIZE_IN_BYTES);

    for(int kernel = 0;kernel < 2;kernel++){
        switch(kernel){
            case 0:
                what_is_my_id_2d_A<<<blocks_rect,threads_rect>>>(gpu_block_x,
                        gpu_block_y,
                        gpu_thread,
                        gpu_calc_thread,
                        gpu_xthread,
                        gpu_ythread,
                        gpu_grid_dimx,
                        gpu_block_dimx,
                        gpu_grid_dimy,
                        gpu_block_dimy
                        );
                break;
            case 1:
                 what_is_my_id_2d_A<<<blocks_square,threads_square>>>(gpu_block_x,
                        gpu_block_y,
                        gpu_thread,
                        gpu_calc_thread,
                        gpu_xthread,
                        gpu_ythread,
                        gpu_grid_dimx,
                        gpu_block_dimx,
                        gpu_grid_dimy,
                        gpu_block_dimy
                        );
                break;
            default:
                exit(1);break;
        }
    //copy result from gpu memory to cpu memory
    cudaMemcpy(cpu_block_x,gpu_block_x,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_block_y,gpu_block_y,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_thread,gpu_thread,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_calc_thread,gpu_calc_thread,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_xthread,gpu_xthread,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_ythread,gpu_ythread,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_grid_dimx,gpu_grid_dimx,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_grid_dimy,gpu_grid_dimy,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_block_dimx,gpu_block_dimx,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);
    cudaMemcpy(cpu_block_dimy,gpu_block_dimy,ARRAY_SIZE_IN_BYTES,cudaMemcpyDeviceToHost);

    char ch;
    printf("\nkernel %d\n",kernel);
    
    for(int y=0;y<ARRAY_SIZE_Y;y++){
        for(int x=0;x<ARRAY_SIZE_X;x++){
            printf("CT:%2u BKX:%1u BKY:%1u TID:%2u YTID:%2u XTID:%2u GDX:%1u BDX:%1u GDY:%1u BDX:%1u\n",
                cpu_calc_thread[y][x],
                cpu_block_x[y][x],
                cpu_block_y[y][x],
                cpu_thread[y][x],
                cpu_ythread[y][x],
                cpu_xthread[y][x],
                cpu_grid_dimx[y][x],
                cpu_block_dimx[y][x],
                cpu_grid_dimy[y][x],
                cpu_block_dimy[y][x]
                );
            ch = getch();
        }
        printf("press any key to continue\n");
        ch = getch();
    }   

    }

    

    cudaFree(gpu_block_x);
    cudaFree(gpu_block_y);
    cudaFree(gpu_thread);
    cudaFree(gpu_calc_thread);
    cudaFree(gpu_xthread);
    cudaFree(gpu_ythread);
    cudaFree(gpu_grid_dimx);
    cudaFree(gpu_grid_dimy);
    cudaFree(gpu_block_dimx);
    cudaFree(gpu_block_dimy);
    return 0;

}


kernel 0
CT: 0 BKX:0 BKY:0 TID: 0 YTID: 0 XTID: 0 GDX:1 BDX:32 GDY:4 BDX:4
CT: 1 BKX:0 BKY:0 TID: 1 YTID: 0 XTID: 1 GDX:1 BDX:32 GDY:4 BDX:4
CT: 2 BKX:0 BKY:0 TID: 2 YTID: 0 XTID: 2 GDX:1 BDX:32 GDY:4 BDX:4
CT: 3 BKX:0 BKY:0 TID: 3 YTID: 0 XTID: 3 GDX:1 BDX:32 GDY:4 BDX:4
CT: 4 BKX:0 BKY:0 TID: 4 YTID: 0 XTID: 4 GDX:1 BDX:32 GDY:4 BDX:4
CT: 5 BKX:0 BKY:0 TID: 5 YTID: 0 XTID: 5 GDX:1 BDX:32 GDY:4 BDX:4
CT: 6 BKX:0 BKY:0 TID: 6 YTID: 0 XTID: 6 GDX:1 BDX:32 GDY:4 BDX:4
CT: 7 BKX:0 BKY:0 TID: 7 YTID: 0 XTID: 7 GDX:1 BDX:32 GDY:4 BDX:4
CT: 8 BKX:0 BKY:0 TID: 8 YTID: 0 XTID: 8 GDX:1 BDX:32 GDY:4 BDX:4
CT: 9 BKX:0 BKY:0 TID: 9 YTID: 0 XTID: 9 GDX:1 BDX:32 GDY:4 BDX:4
CT:10 BKX:0 BKY:0 TID:10 YTID: 0 XTID:10 GDX:1 BDX:32 GDY:4 BDX:4
CT:11 BKX:0 BKY:0 TID:11 YTID: 0 XTID:11 GDX:1 BDX:32 GDY:4 BDX:4
CT:12 BKX:0 BKY:0 TID:12 YTID: 0 XTID:12 GDX:1 BDX:32 GDY:4 BDX:4
CT:13 BKX:0 BKY:0 TID:13 YTID: 0 XTID:13 GDX:1 BDX:32 GDY:4 BDX:4
CT:14 BKX:0 BKY:0 TID:14 YTID: 0 XTID:14 GDX:1 BDX:32 GDY:4 BDX:4
CT:15 BKX:0 BKY:0 TID:15 YTID: 0 XTID:15 GDX:1 BDX:32 GDY:4 BDX:4
CT:16 BKX:0 BKY:0 TID:16 YTID: 0 XTID:16 GDX:1 BDX:32 GDY:4 BDX:4
CT:17 BKX:0 BKY:0 TID:17 YTID: 0 XTID:17 GDX:1 BDX:32 GDY:4 BDX:4
CT:18 BKX:0 BKY:0 TID:18 YTID: 0 XTID:18 GDX:1 BDX:32 GDY:4 BDX:4
CT:19 BKX:0 BKY:0 TID:19 YTID: 0 XTID:19 GDX:1 BDX:32 GDY:4 BDX:4
CT:20 BKX:0 BKY:0 TID:20 YTID: 0 XTID:20 GDX:1 BDX:32 GDY:4 BDX:4
CT:21 BKX:0 BKY:0 TID:21 YTID: 0 XTID:21 GDX:1 BDX:32 GDY:4 BDX:4
CT:22 BKX:0 BKY:0 TID:22 YTID: 0 XTID:22 GDX:1 BDX:32 GDY:4 BDX:4
CT:23 BKX:0 BKY:0 TID:23 YTID: 0 XTID:23 GDX:1 BDX:32 GDY:4 BDX:4
CT:24 BKX:0 BKY:0 TID:24 YTID: 0 XTID:24 GDX:1 BDX:32 GDY:4 BDX:4
CT:25 BKX:0 BKY:0 TID:25 YTID: 0 XTID:25 GDX:1 BDX:32 GDY:4 BDX:4
CT:26 BKX:0 BKY:0 TID:26 YTID: 0 XTID:26 GDX:1 BDX:32 GDY:4 BDX:4
CT:27 BKX:0 BKY:0 TID:27 YTID: 0 XTID:27 GDX:1 BDX:32 GDY:4 BDX:4
CT:28 BKX:0 BKY:0 TID:28 YTID: 0 XTID:28 GDX:1 BDX:32 GDY:4 BDX:4
CT:29 BKX:0 BKY:0 TID:29 YTID: 0 XTID:29 GDX:1 BDX:32 GDY:4 BDX:4
CT:30 BKX:0 BKY:0 TID:30 YTID: 0 XTID:30 GDX:1 BDX:32 GDY:4 BDX:4
CT:31 BKX:0 BKY:0 TID:31 YTID: 0 XTID:31 GDX:1 BDX:32 GDY:4 BDX:4
press any key to continue
CT:32 BKX:0 BKY:0 TID: 0 YTID: 1 XTID: 0 GDX:1 BDX:32 GDY:4 BDX:4
CT:33 BKX:0 BKY:0 TID: 1 YTID: 1 XTID: 1 GDX:1 BDX:32 GDY:4 BDX:4

......................

CT:470 BKX:1 BKY:1 TID: 6 YTID:14 XTID:22 GDX:2 BDX:16 GDY:2 BDX:8
CT:471 BKX:1 BKY:1 TID: 7 YTID:14 XTID:23 GDX:2 BDX:16 GDY:2 BDX:8
CT:472 BKX:1 BKY:1 TID: 8 YTID:14 XTID:24 GDX:2 BDX:16 GDY:2 BDX:8
CT:473 BKX:1 BKY:1 TID: 9 YTID:14 XTID:25 GDX:2 BDX:16 GDY:2 BDX:8
CT:474 BKX:1 BKY:1 TID:10 YTID:14 XTID:26 GDX:2 BDX:16 GDY:2 BDX:8
CT:475 BKX:1 BKY:1 TID:11 YTID:14 XTID:27 GDX:2 BDX:16 GDY:2 BDX:8
CT:476 BKX:1 BKY:1 TID:12 YTID:14 XTID:28 GDX:2 BDX:16 GDY:2 BDX:8
CT:477 BKX:1 BKY:1 TID:13 YTID:14 XTID:29 GDX:2 BDX:16 GDY:2 BDX:8
CT:478 BKX:1 BKY:1 TID:14 YTID:14 XTID:30 GDX:2 BDX:16 GDY:2 BDX:8
CT:479 BKX:1 BKY:1 TID:15 YTID:14 XTID:31 GDX:2 BDX:16 GDY:2 BDX:8
press any key to continue
CT:480 BKX:0 BKY:1 TID: 0 YTID:15 XTID: 0 GDX:2 BDX:16 GDY:2 BDX:8
CT:481 BKX:0 BKY:1 TID: 1 YTID:15 XTID: 1 GDX:2 BDX:16 GDY:2 BDX:8
CT:482 BKX:0 BKY:1 TID: 2 YTID:15 XTID: 2 GDX:2 BDX:16 GDY:2 BDX:8
CT:483 BKX:0 BKY:1 TID: 3 YTID:15 XTID: 3 GDX:2 BDX:16 GDY:2 BDX:8
CT:484 BKX:0 BKY:1 TID: 4 YTID:15 XTID: 4 GDX:2 BDX:16 GDY:2 BDX:8
CT:485 BKX:0 BKY:1 TID: 5 YTID:15 XTID: 5 GDX:2 BDX:16 GDY:2 BDX:8
CT:486 BKX:0 BKY:1 TID: 6 YTID:15 XTID: 6 GDX:2 BDX:16 GDY:2 BDX:8
CT:487 BKX:0 BKY:1 TID: 7 YTID:15 XTID: 7 GDX:2 BDX:16 GDY:2 BDX:8
CT:488 BKX:0 BKY:1 TID: 8 YTID:15 XTID: 8 GDX:2 BDX:16 GDY:2 BDX:8
CT:489 BKX:0 BKY:1 TID: 9 YTID:15 XTID: 9 GDX:2 BDX:16 GDY:2 BDX:8
CT:490 BKX:0 BKY:1 TID:10 YTID:15 XTID:10 GDX:2 BDX:16 GDY:2 BDX:8
CT:491 BKX:0 BKY:1 TID:11 YTID:15 XTID:11 GDX:2 BDX:16 GDY:2 BDX:8
CT:492 BKX:0 BKY:1 TID:12 YTID:15 XTID:12 GDX:2 BDX:16 GDY:2 BDX:8
CT:493 BKX:0 BKY:1 TID:13 YTID:15 XTID:13 GDX:2 BDX:16 GDY:2 BDX:8
CT:494 BKX:0 BKY:1 TID:14 YTID:15 XTID:14 GDX:2 BDX:16 GDY:2 BDX:8
CT:495 BKX:0 BKY:1 TID:15 YTID:15 XTID:15 GDX:2 BDX:16 GDY:2 BDX:8
CT:496 BKX:1 BKY:1 TID: 0 YTID:15 XTID:16 GDX:2 BDX:16 GDY:2 BDX:8
CT:497 BKX:1 BKY:1 TID: 1 YTID:15 XTID:17 GDX:2 BDX:16 GDY:2 BDX:8
CT:498 BKX:1 BKY:1 TID: 2 YTID:15 XTID:18 GDX:2 BDX:16 GDY:2 BDX:8
CT:499 BKX:1 BKY:1 TID: 3 YTID:15 XTID:19 GDX:2 BDX:16 GDY:2 BDX:8
CT:500 BKX:1 BKY:1 TID: 4 YTID:15 XTID:20 GDX:2 BDX:16 GDY:2 BDX:8
CT:501 BKX:1 BKY:1 TID: 5 YTID:15 XTID:21 GDX:2 BDX:16 GDY:2 BDX:8
CT:502 BKX:1 BKY:1 TID: 6 YTID:15 XTID:22 GDX:2 BDX:16 GDY:2 BDX:8
CT:503 BKX:1 BKY:1 TID: 7 YTID:15 XTID:23 GDX:2 BDX:16 GDY:2 BDX:8
CT:504 BKX:1 BKY:1 TID: 8 YTID:15 XTID:24 GDX:2 BDX:16 GDY:2 BDX:8
CT:505 BKX:1 BKY:1 TID: 9 YTID:15 XTID:25 GDX:2 BDX:16 GDY:2 BDX:8
CT:506 BKX:1 BKY:1 TID:10 YTID:15 XTID:26 GDX:2 BDX:16 GDY:2 BDX:8
CT:507 BKX:1 BKY:1 TID:11 YTID:15 XTID:27 GDX:2 BDX:16 GDY:2 BDX:8
CT:508 BKX:1 BKY:1 TID:12 YTID:15 XTID:28 GDX:2 BDX:16 GDY:2 BDX:8
CT:509 BKX:1 BKY:1 TID:13 YTID:15 XTID:29 GDX:2 BDX:16 GDY:2 BDX:8
CT:510 BKX:1 BKY:1 TID:14 YTID:15 XTID:30 GDX:2 BDX:16 GDY:2 BDX:8
CT:511 BKX:1 BKY:1 TID:15 YTID:15 XTID:31 GDX:2 BDX:16 GDY:2 BDX:8
press any key to continue

参考


查看评论

CUDA笔记2:概念理解

CUDA基本概念: CUDA全称是ComputeUnified Device Architecture,中文名称即统一计算设备架构,它是NVIDIA公司提出了一种通用的并行计算平台和编程模型。使用...
  • Ai_ViVi
  • Ai_ViVi
  • 2014-12-29 11:08:05
  • 2068

cuda 初学大全

cuda 初学大全 原文:http://hi.baidu.com/coolrainbow/item/de05efc83151671a50505878 1 硬件架构CUDA编程中,习惯称CPU为Host...
  • Augusdi
  • Augusdi
  • 2013-10-09 23:30:29
  • 8478

cuda之thread,block,gird详解

本文将通过一个程序帮助了解线程块的分配,以及线程束,线程全局标号等 #include #include #include #include #include #define ARRAY_SI...
  • xuhang0910
  • xuhang0910
  • 2015-08-25 16:50:12
  • 1229

(Cuda)基础知识(一)

本文地址http://blog.csdn.net/mounty_fsc/article/details/51092920 本部分内容为[1]CUDA_C_Programming_Guide.pdf中笔...
  • mounty_fsc
  • mounty_fsc
  • 2016-05-02 00:11:27
  • 2568

CUDA编程入门极简教程

码字不易,欢迎给个赞! 欢迎交流与转载,文章会同步发布在公众号:机器学习算法全栈工程师(Jeemy110) 目录 目录 前言 CUDA编程模型基础 向量加法实例 矩阵乘法实例 小结 ...
  • xiaohu2022
  • xiaohu2022
  • 2018-03-18 13:29:44
  • 163

CUDA编程(三):线程模型

一.CUDA线程模型概览首先要搞清楚的就是线程网格(grid),线程块(block)和线程(thread)之间的关系. 在前面的文章里面就已经看到了核函数kernel但是并不知道这个核函数启动的...
  • xierhacker
  • xierhacker
  • 2016-10-06 20:47:22
  • 1033

dimGrid和blockDim变量

cuda中kernel的启动 //设置对应的执行配置参数,dim3类型的struct变量 dim3 dimBlock(Width,Width);//描述块的配置 dim3 dimGrid(1,1...
  • ZIV555
  • ZIV555
  • 2016-05-19 16:43:18
  • 967

CUDA学习(6)Kernel的加载-threadIdx

刚开始学习CUDA的时候,对kernel加载的计算idx一直很模糊,threadIdx.x,blockx.x,blockDim,gridDim等一直分不清。经过查阅各方资料,特在此做个整理,表述一下个...
  • Mao_Jonah
  • Mao_Jonah
  • 2018-01-05 11:03:58
  • 40

cuda中变量的属性

在device亚程序中声明的变量可以有以下几种属性(device,managed, constant, shared和pinned):   Device数据 储存在device全...
  • Virtual_Func
  • Virtual_Func
  • 2015-10-17 15:19:31
  • 568

CUDA学习,第一个kernel函数及代码讲解

CUDA学习,第一个kernel函数及代码讲解。本博文分为三个部分,第一部分给出一个代码示例,第二部分对代码进行讲解,第三部分根据这个例子介绍如何部署和发起一个kernel函数。...
  • helei001
  • helei001
  • 2014-05-13 22:29:41
  • 3384
    个人资料
    持之以恒
    等级:
    访问量: 23万+
    积分: 6097
    排名: 5131
    最新评论