cudaEvent_t start, stop;CHECK(cudaEventCreate(&start));// 创建cuda 事件对象。CHECK(cudaEventCreate(&stop));CHECK(cudaEventRecord(start));// 记录代表开始的事件。cudaEventQuery(start);// 强制刷新 cuda 执行流。// run code.CHECK(cudaEventRecord(stop));CHECK(cudaEventSynchronize(stop));// 强制同步,让主机等待cuda事件执行完毕。float elapsed_time =0;CHECK(cudaEventElapsedTime(&curr_time, start, stop));// 计算 start 和stop间的时间差(ms)。printf("host memory malloc and copy: %f ms.\n", curr_time - elapsed_time);
注意事项
nvprof
nvprof在算力8.0以上的卡好像不支持
$ nvprof ./add
======== Warning: nvprof is not supported on devices with compute capability 8.0and higher.
Use NVIDIA Nsight Systems for GPU tracing and CPU sampling and NVIDIA Nsight Compute for GPU profiling.
Refer https://developer.nvidia.com/tools-overview for more details.