写cuda程序的时候,不能使用cpu的方法来测gpu程序的时间,运行代码记录如下:
#define START_GPU { \
cudaEvent_t start, stop;\
float elapsedTime;\
checkCudaErrors(cudaEventCreate(&start));\
checkCudaErrors(cudaEventCreate(&stop));\
checkCudaErrors(cudaEventRecord(start, 0));\
#define END_GPU \
checkCudaErrors(cudaEventRecord(stop, 0));\
checkCudaErrors(cudaEventSynchronize(stop));\
checkCudaErrors(cudaEventElapsedTime(&elapsedTime, start, stop));\
printf("end checking: %2.5f seconds\n", elapsedTime / 1000);\
checkCudaErrors(cudaEventDestroy(start));\
checkCudaErrors(cudaEventDestroy(stop));}