首先来看一个sample
test_cuda.cpp
// test_cuda_consle_2.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include "stdio.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
int main()
{
int deviceCount = 0;
cudaGetDeviceCount(&deviceCount);//runtime API中的函数以cuda为前缀,driver API中的函数则以cu为前缀
if (deviceCount == 0)
{
printf("There is no device suppporting CUDA\n");
}
int dev = 0;
for (; dev < deviceCount; dev++)
{
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp,dev);
if (dev == 0)
{
if (deviceProp.major == 9999 && deviceProp.minor == 9999)//deviceProp.(major,minor)分别是设备计算能力的主版本号和次版本号
printf("There is no device supporting CUDA.\n");
else if (deviceCount == 1)
printf("There is 1 device supporting CUDA.\n");
else
printf("there are %d devices supporting CUDA.\n",deviceCount);
}
printf("\n Device %d:\"%s\"\n",dev,deviceProp.name);
printf("Major revision number: %d\n",deviceProp.major);
printf("Minor revision number: %d\n",deviceProp.minor);
printf("Total amount of global memory: %u bytes\n",deviceProp.totalGlobalMem);
#if CUDART_VERSION>=2000
printf("Number of multiprocessors:%d\n",deviceProp.multiProcessorCount);
printf("Number of cores:%d\n",8*deviceProp.multiProcessorCount);
#endif
printf("Total amount of constant memory:%u bytes\n",deviceProp.totalConstMem);
printf("Total amount of shared memory per block: %u bytes\n",deviceProp.sharedMemPerBlock);
printf("Total number of registers available per block:%d\n",deviceProp.regsPerBlock);
printf("Warp size:%d\n",deviceProp.warpSize);
printf("Maximum number of threads per block: %d\n",deviceProp.maxThreadsPerBlock);
printf("Maximum sizes of each dimension of a block:%d x %d x %d \n",
dev