CUDA global_reduce实现
CUDA global_reduce实现
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <cuda_runtime.h>
__global__ void global_reduce(float* d_out, float* d_in)
{
int myID = blockIdx.x * blockDim.x + threadIdx.x;//得到该线程在全局的线程号
int
原创
2020-11-17 12:58:54 ·
249 阅读 ·
0 评论