cuda 入门 threadIdx

本文介绍了CUDA编程中如何利用三维grid和block的索引来访问全局数组。通过设置dim3 block(2,2,2)和dim3 grid(2,2,2),每个线程通过threadIdx和blockIdx计算其唯一的全局gid,并以此访问输入数组。该方法展示了CUDA并行计算中数据访问的基本策略。
摘要由CSDN通过智能技术生成

一维 grid 唯一索引

__global__ void unique_gid_calculation(int * input)
{
	int tid = threadIdx.x;
	int offset = blockIdx.x * blockDim.x;
	int gid = tid + offset;
	printf("blockIdx.x : %d, threadIdx.x : %d, gid : %d, value : %d \n",
		blockIdx.x, tid, gid, input[gid]);
}

二维 grid 唯一索引

__global__ void unique_gid_calculation_2d(int * data)
{
	int tid = threadIdx.x;
	int block_offset = blockIdx.x * blockDim.x;

	int row_offset = blockDim.x * gridDim.x * blockIdx.y;

	int gid = row_offset + block_offset + tid;
	printf("blockIdx.x : %d, blockIdx.y : %d, threadIdx.x : %d, gid : %d - data : %d \n",
		blockIdx.x, blockIdx.y, tid, gid, data[gid]);
}

二维 grid 二维 block

__global__ void unique_gid_calculation_2d_2d(int * data)
{
	int tid = blockDim.x * threadIdx.y + threadIdx.x;

	int num_threads_in_a_block = blockDim.x * blockDim.y;
	int block_offset = blockIdx.x * num_threads_in_a_block;

	int num_threads_in_a_row = num_threads_in_a_block * gridDim.x;
	int row_offset = num_threads_in_a_row * blockIdx.y;

	int gid = tid + block_offset + row_offset;

	printf("blockIdx.x : %d, blockIdx.y : %d, threadIdx.x : %d, gid : %d - data : %d \n",
		blockIdx.x, blockIdx.y, tid, gid, data[gid]);
}

三维 grid 三维 block

__global__ void mem_trans(int* input)
{
	int threadId_3D = threadIdx.x + threadIdx.y * blockDim.x +
		threadIdx.z * blockDim.x * blockDim.y;
	int blockId_3D = blockIdx.x + blockIdx.y * gridDim.x +
		blockIdx.z * gridDim.x * gridDim.y;
	int gid = threadId_3D + (blockDim.x * blockDim.y * blockDim.z) * blockId_3D;
	printf("tid : %d, gid : %d, value : %d\n",
		threadIdx.x, gid, input[gid]);
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值