关键是更新tid
tid += blockDim.x * gridDim.x; (加上所有线程的数量,以便对其他数组元素接着更新)
- __global__ void add(int *d_arr, int *d_brr, int *d_crr, int arrLength) {
- int tid = blockDim.x * blockIdx.x + threadIdx.x;
- if(tid <arrLength) {
- d_crr[tid] = d_arr[tid] + d_brr[tid];
- tid += blockDim.x * gridDim.x;
- }
- }