- C++
#include "driver_types.h"
float time_elapsed = 0;
cudaEvent_t start, stop;
cudaEventCreate(&start); // 创建Event
cudaEventCreate(&stop);
cudaEventRecord(start, 0); // 记录当前时间
m_Context->enqueue(m_BatchcSize, m_Bindings.data(), m_CudaStream, nullptr);
cudaEventRecord(stop, 0); // 记录当前时间
cudaEventSynchronize(start); // Watis for an event to complate
cudaEventSynchronize(stop); // Waits for an event to complate.Record 之前的任务
cudaEventElapsedTime(&time_elapsed, start, stop); // 计算时间差
cudaEventDestroy(start); // destory the event
cudaEventDestroy(stop);
std::cout << "time one map for a batch = " << time_elapsed << std::endl;
start = cupy.cuda.Event()
end = cupy.cuda.Event()
start.record()
m.run()
end.record()
start.synchronize()
end.synchronize()
time_elapsed = cupy.cuda.get_elapsed_time(start, end)
print("running time: ", time_elapsed)