Stream Tips:
1. for() cudaStreamCreate()
2. cudaMallocHost()
3. for() cudaMemcpyAsync(,,,stream[i]);
for() kernel<<<,,,stream[i]>>>()
for() cudaMemcpyAsync(,,,stream[i]);
cudaThreadSynchronize();
4. for() cudaStreamDestroy();
Figure 1. Stream && event demo
Event Tips:
1. cudaEventCreate(&start);
cudaEventCreate(&stop);
2. cudaEventRecord(start,0);
for() cudaMemcpyAsync(,,,stream[i]);
for() kernel<<<,,,stream[i]>>>()
for() cudaMemcpyAsync(,,,stream[i]);
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsedTime, start, stop);
3. cudaEventDestroy(start);
cudaEventDestroy(stop);
Examples:
1. cudaEventCreate(&start);
cudaEventCreate(&stop);
2. cutCreateTimer(&timer);
cutResetTimer(timer);
cudaThreadSynchronize();
3. cutStartTimer(timer);
cudaEventRecord(start,0);
cudaMemcpyAsync();
kernel<<<>>>();
cudaMemcpyAsync();
cudaEventRecord(stop,0);
cutStopTimer(timer);
4. cudaEventQuery(stop)==true, cudaEventElapsedTimer();
5. gpu_time = 29.39 (time spent executing by GPU)
cutGetTimerValue(timer) = 0.05 (time spent by CPU in CUDA calls)
More about stream&&event, please visit: http://www.hpctech.com/