cuda编程[1]:一二三维网格和块的核函数

目录

前言

所有的代码下载链接:code。以下代码展示了如何在 CUDA 中打印网格和线程的索引信息。代码包括一维、二维和三维的网格和块的设置,并定义了多个内核函数来输出当前的索引信息。

核函数

  1. 打印线程索引
__global__ void print_idx_kernel(){
    printf("block idx: (%3d, %3d, %3d), thread idx: (%3d, %3d, %3d)\n",
         blockIdx.z, blockIdx.y, blockIdx.x,
         threadIdx.z, threadIdx.y, threadIdx.x);
}
  1. 打印网格和块的维度
__global__ void print_dim_kernel(){
    printf("grid dimension: (%3d, %3d, %3d), block dimension: (%3d, %3d, %3d)\n",
         gridDim.z, gridDim.y, gridDim.x,
         blockDim.z, blockDim.y, blockDim.x);
}
  1. 打印每个块的线程索引
__global__ void print_thread_idx_per_block_kernel(){
    int index = threadIdx.z * blockDim.x * blockDim.y + \
              threadIdx.y * blockDim.x + \
              threadIdx.x;

    printf("block idx: (%3d, %3d, %3d), thread idx: %3d\n",
         blockIdx.z, blockIdx.y, blockIdx.x,
         index);
}
  1. 打印网格和块的维度
__global__ void print_thread_idx_per_grid_kernel(){
    int bSize  = blockDim.z * blockDim.y * blockDim.x;

    int bIndex = blockIdx.z * gridDim.x * gridDim.y + \
               blockIdx.y * gridDim.x + \
               blockIdx.x;

    int tIndex = threadIdx.z * blockDim.x * blockDim.y + \
               threadIdx.y * blockDim.x + \
               threadIdx.x;

    int index  = bIndex * bSize + tIndex;

    printf("block idx: %3d, thread idx in block: %3d, thread idx: %3d\n", 
         bIndex, tIndex, index);
}
  1. 打印坐标
__global__ void print_cord_kernel(){
    int index = threadIdx.z * blockDim.x * blockDim.y + \
              threadIdx.y * blockDim.x + \
              threadIdx.x;

    int x  = blockIdx.x * blockDim.x + threadIdx.x;
    int y  = blockIdx.y * blockDim.y + threadIdx.y;
    int z  = blockIdx.z * blockDim.z + threadIdx.z;

    printf("block idx: (%3d, %3d, %3d), thread idx: %3d, cord: (%3d, %3d, %3d)\n",
         blockIdx.z, blockIdx.y, blockIdx.x,
         index, x, y, z);
}

一维

不可以缺少,cudaDeviceSynchronize( CPU与GPU端完成同步),当主函数在cpu中执行到需要调用核函数的时候不会等GPU全部完成返回结果,需要加上这个同步函数,不然运行可执行文件的时候会得到空的结果。

代码

void print_one_dim() {
    int inputSize = 8;
    int blockDim = 4;
    int gridDim = inputSize / blockDim;

    dim3 block(blockDim);
    dim3 grid(gridDim);

    printf("grid dimension: %d, block dimension: %d,\n", grid.x, block.x);

    cudaDeviceSynchronize();
}

二维

代码

void print_two_dim() {
    int inputWidth = 4;
    int blockDim = 2;
    int gridDim = inputWidth / blockDim;

    dim3 block(blockDim, blockDim);
    dim3 grid(gridDim, gridDim);

    printf("grid dimension: (%d, %d), block dimension: (%d, %d)\n",
           grid.y, grid.x, block.y, block.x);

    cudaDeviceSynchronize();
}

三维打印

代码

void print_three_dim() {
    int depth = 3;
    int height = 3;
    int width = 3;

    int blockDim = 2;

    dim3 block(blockDim, blockDim, blockDim);
    dim3 grid((width + blockDim - 1) / blockDim, 
              (height + blockDim - 1) / blockDim,
              (depth + blockDim - 1) / blockDim);

    printf("grid dimension: (%d, %d, %d), block dimension: (%d, %d, %d)\n",
           grid.z, grid.y, grid.x,
           block.z, block.y, block.x);

    cudaDeviceSynchronize();
}

主函数

这里就可以自己来使用print_one_dim, print_two_dim, print_three_dim测试不同网格不块的情况。可以自行组合定义核函数来测试所有情况。

int main() {
    // 选择打印的维度
    // print_one_dim();
    // print_two_dim();
    print_three_dim();

    return 0;
}

结果分析

这个只是一个小的.cu代码,所以我直接使用了笔记(点击代码链接可以看到)中得第一种方法编译。
在这里插入图片描述
打开当前代码目录下运行

nvcc grid_block_123D.cu -o test
./test

得到结果
···txt
grid dimension: (2, 2, 2), block dimension: (2, 2, 2)
block idx: ( 1, 0, 1), thread idx: 0, cord: ( 2, 0, 2)
block idx: ( 1, 0, 1), thread idx: 1, cord: ( 3, 0, 2)
block idx: ( 1, 0, 1), thread idx: 2, cord: ( 2, 1, 2)
block idx: ( 1, 0, 1), thread idx: 3, cord: ( 3, 1, 2)
block idx: ( 1, 0, 1), thread idx: 4, cord: ( 2, 0, 3)
block idx: ( 1, 0, 1), thread idx: 5, cord: ( 3, 0, 3)
block idx: ( 1, 0, 1), thread idx: 6, cord: ( 2, 1, 3)
block idx: ( 1, 0, 1), thread idx: 7, cord: ( 3, 1, 3)
block idx: ( 0, 1, 0), thread idx: 0, cord: ( 0, 2, 0)
block idx: ( 0, 1, 0), thread idx: 1, cord: ( 1, 2, 0)
block idx: ( 0, 1, 0), thread idx: 2, cord: ( 0, 3, 0)
block idx: ( 0, 1, 0), thread idx: 3, cord: ( 1, 3, 0)
block idx: ( 0, 1, 0), thread idx: 4, cord: ( 0, 2, 1)
block idx: ( 0, 1, 0), thread idx: 5, cord: ( 1, 2, 1)
block idx: ( 0, 1, 0), thread idx: 6, cord: ( 0, 3, 1)
block idx: ( 0, 1, 0), thread idx: 7, cord: ( 1, 3, 1)
block idx: ( 1, 0, 0), thread idx: 0, cord: ( 0, 0, 2)
block idx: ( 1, 0, 0), thread idx: 1, cord: ( 1, 0, 2)
block idx: ( 1, 0, 0), thread idx: 2, cord: ( 0, 1, 2)
block idx: ( 1, 0, 0), thread idx: 3, cord: ( 1, 1, 2)
block idx: ( 1, 0, 0), thread idx: 4, cord: ( 0, 0, 3)
block idx: ( 1, 0, 0), thread idx: 5, cord: ( 1, 0, 3)
block idx: ( 1, 0, 0), thread idx: 6, cord: ( 0, 1, 3)
block idx: ( 1, 0, 0), thread idx: 7, cord: ( 1, 1, 3)
block idx: ( 0, 0, 1), thread idx: 0, cord: ( 2, 0, 0)
block idx: ( 0, 0, 1), thread idx: 1, cord: ( 3, 0, 0)
block idx: ( 0, 0, 1), thread idx: 2, cord: ( 2, 1, 0)
block idx: ( 0, 0, 1), thread idx: 3, cord: ( 3, 1, 0)
block idx: ( 0, 0, 1), thread idx: 4, cord: ( 2, 0, 1)
block idx: ( 0, 0, 1), thread idx: 5, cord: ( 3, 0, 1)
block idx: ( 0, 0, 1), thread idx: 6, cord: ( 2, 1, 1)
block idx: ( 0, 0, 1), thread idx: 7, cord: ( 3, 1, 1)
block idx: ( 1, 1, 1), thread idx: 0, cord: ( 2, 2, 2)
block idx: ( 1, 1, 1), thread idx: 1, cord: ( 3, 2, 2)
block idx: ( 1, 1, 1), thread idx: 2, cord: ( 2, 3, 2)
block idx: ( 1, 1, 1), thread idx: 3, cord: ( 3, 3, 2)
block idx: ( 1, 1, 1), thread idx: 4, cord: ( 2, 2, 3)
block idx: ( 1, 1, 1), thread idx: 5, cord: ( 3, 2, 3)
block idx: ( 1, 1, 1), thread idx: 6, cord: ( 2, 3, 3)
block idx: ( 1, 1, 1), thread idx: 7, cord: ( 3, 3, 3)
block idx: ( 0, 1, 1), thread idx: 0, cord: ( 2, 2, 0)
block idx: ( 0, 1, 1), thread idx: 1, cord: ( 3, 2, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 0, 0), thread idx: 4, cord: ( 0, 0, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 0, 0), thread idx: 4, cord: ( 0, 0, 1)
block idx: ( 0, 0, 0), thread idx: 5, cord: ( 1, 0, 1)
block idx: ( 0, 0, 0), thread idx: 6, cord: ( 0, 1, 1)
block idx: ( 0, 0, 0), thread idx: 7, cord: ( 1, 1, 1)
block idx: ( 1, 1, 0), thread idx: 0, cord: ( 0, 2, 2)
block idx: ( 1, 1, 0), thread idx: 1, cord: ( 1, 2, 2)
block idx: ( 1, 1, 0), thread idx: 2, cord: ( 0, 3, 2)
block idx: ( 1, 1, 0), thread idx: 3, cord: ( 1, 3, 2)
block idx: ( 1, 1, 0), thread idx: 4, cord: ( 0, 2, 3)
block idx: ( 1, 1, 0), thread idx: 5, cord: ( 1, 2, 3)
block idx: ( 1, 1, 0), thread idx: 6, cord: ( 0, 3, 3)
block idx: ( 1, 1, 0), thread idx: 7, cord: ( 1, 3, 3)

  • 10
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小马敲马

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值