cuda——使用stream

最新推荐文章于 2024-02-01 21:51:38 发布

dumeichen

最新推荐文章于 2024-02-01 21:51:38 发布

阅读量2.1k

点赞数

分类专栏： cuda

本文链接：https://blog.csdn.net/dumeichen/article/details/25000521

版权

cuda 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

/*对stream的介绍*/
#include <stdio.h>
#define N 1024*1024//每次从CPU传输到GPU的数据块大小
#define M N*10//CPU上的总数据量

/*测试设备是否支持边执行核函数边复制数据*/
bool support_overlap(){
	cudaDeviceProp prop;
	int preDev;
	cudaGetDevice(&preDev);
	cudaGetDeviceProperties(&prop,preDev);
	if(prop.deviceOverlap)
		return true;
	return false;
}

__global__ void add(int* a,int* b,int* c){
	int tid = threadIdx.x+blockIdx.x*blockDim.x;
	if(tid<N){
		c[tid] = a[tid] + b[tid];
	}
}

int main(){
	cudaEvent_t start,stop;
	float elapsedTime;
	cudaStream_t stream;//声明流
	int *a,*b,*c,*d_a,*d_b,*d_c;
	if(!support_overlap){
		printf("Sorry,the device cannot support overlap.\n");
		return 0;
	}
	cudaEventCreate(&start);
	cudaEventCreate(&stop);
	cudaEventRecord(start,0);
	cudaStreamCreate(&stream);//初始化流
	cudaMalloc((void**)&d_a,N*sizeof(int));
	cudaMalloc((void**)&d_b,N*sizeof(int));
	cudaMalloc((void**)&d_c,N*sizeof(int));
	/*在主机上分配页锁定内存*/
	cudaHostAlloc((void**)&a,M*sizeof(int),cudaHostAllocDefault);
	cudaHostAlloc((void**)&b,M*sizeof(int),cudaHostAllocDefault);
	cudaHostAlloc((void**)&c,M*sizeof(int),cudaHostAllocDefault);
	/*用随机数填充主机内存*/
	for(int i = 0;i<M;i++){
		a[i] = rand();
		b[i] = rand();
	}
	/*将输入缓冲区划分为更小的块，并在每个块上执行“数据传输到GPU”，“计算”，“数据传输回CPU”三个步骤*/
	for(int i = 0;i<M;i+=N){
		//主机上的页锁定内存以异步方式复制到设备上
		cudaMemcpyAsync(d_a,a+i,N*sizeof(int),cudaMemcpyHostToDevice,stream);
		cudaMemcpyAsync(d_b,b+i,N*sizeof(int),cudaMemcpyHostToDevice,stream);
		add<<<N/256,256,0,stream>>>(d_a,d_b,d_c);
		cudaMemcpyAsync(c+i,d_c,sizeof(int),cudaMemcpyDeviceToHost,stream);
	}
	cudaStreamSynchronize(stream);//实现CPU和GPU的同步
	cudaEventRecord(stop,0);
	cudaEventSynchronize(stop);
	cudaEventElapsedTime(&elapsedTime,start,stop);
	printf("%f",elapsedTime);
	cudaFree(d_a);
	cudaFree(d_b);
	cudaFree(d_c);
	cudaFreeHost(a);
	cudaFreeHost(b);
	cudaFreeHost(b);
	cudaEventDestroy(start);
	cudaEventDestroy(stop);
	cudaStreamDestroy(stream);
	return 0;

}

dumeichen

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
cuda——使用stream

/*对stream的介绍*/#include #define N 1024*1024//每次从CPU传输到GPU的数据块大小#define M N*10//CPU上的总数据量/*测试设备是否支持边执行核函数边复制数据*/bool support_overlap(){ cudaDeviceProp prop; int preDev; cudaGetDevice(&preDev);
复制链接

扫一扫