Cuda编程3(数组数乘,带宽计算,事件计时)
说明:本打算用此程序计算两张显卡之间数据传输的带宽,但是本机只有一张显卡,他依然算出来了一个带宽值,望请指点。事件计时函数的基本用法
__global__ void sclarMultiply(float *input, float scalar, float* output, int size)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < size)
{
output[idx] = input[idx] * scalar;
}
}
void scalarMultiply()
{
const int size = 1000;
const float scalar = 2.0f;
float* h_input = new float[size];
float* h_output = new float[size];
for (int i = 0; i < size; i++)
{
h_input[i] = i;
}
float* d_input, *d_output;
cudaMalloc(&d_input, size*sizeof(float));
cudaMalloc(&d_output, size * sizeof(float));
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
const int block_size = 256;
const int numBlocks = (size + block_size - 1) / block_size;
cudaEventRecord(start);
cudaMemcpy(d_input, h_input, size * sizeof(float), cudaMemcpyHostToDevice);
sclarMultiply << <numBlocks, block_size >> > (d_input, scalar, d_output, size);
cudaMemcpy(h_output, d_output, size * sizeof(float), cudaMemcpyDeviceToHost);
cudaEventRecord(stop);
cudaEventSynchronize(stop);
float millseconds = 0;
cudaEventElapsedTime(&millseconds, start, stop);
std::cout << "用时:" << millseconds<< " 毫秒" << std::endl;
std::cout << " scalar multiply data: " << std::endl;
for (int i = 0; i < size; i++)
{
std::cout << h_output[i] << " ";
}
std::cout << std::endl;
delete[] h_input;
delete[] h_output;
cudaFree(d_input);
cudaFree(d_output);
}