cuda 计算kernel执行时间:
#include <iostream>
using namespace std;
__global__ void kernel()
{
//printf("kkeerrnneell\n");
}
int main(){
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// record start event on the default stream
cudaEventRecord(start);
// execute kernel
for(int i=0; i<1000000; i++)
kernel<<<1,1>>>();
// record stop event on the default stream
cudaEventRecord(stop);
// wait until the stop event completes
cudaEventSynchronize(stop);
// calculate the elapsed time between two events
float time;
cudaEventElapsedTime(&time, start, stop);
cout<<"Time is "<<time<<endl;
// clean up the two events
cudaEventDestroy(start);
cudaEventDestroy(stop);
return 0;
}