#include<memory>#include<iostream>#include<cuda_runtime.h>#include<stdio.h>// Main Program intmain(void){//计算有几个CUDA设备int device_Count =0;cudaGetDeviceCount(&device_Count);// This function returns count of number of CUDA enable devices and 0 if there are no CUDA capable devices.if(device_Count ==0){printf("There are no available device(s) that support CUDA\n");}else{printf("Detected %d CUDA Capable device(s)\n", device_Count);}}
//内存相关属性printf("\n\n\n");printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n",(float)device_Property.totalGlobalMem/1048576.0f,(unsignedlonglong)device_Property.totalGlobalMem);printf(" Memory Clock rate: %.0f MHz \n",
device_Property.memoryClockRate *1e-3f);printf(" Memory Bus Width: %.d-bit\n",device_Property.memoryBusWidth);if(device_Property.l2CacheSize){printf(" L2 Cache Size: %d bytes\n", device_Property.l2CacheSize);}printf(" Total amount of constant memory: %lu bytes)\n",device_Property.totalConstMem);printf(" Total amount of shared memory per block: %lu bytes)\n", device_Property.sharedMemPerBlock);printf(" Total amount of registers available per block: %lu bytes)\n", device_Property.regsPerBlock);
//线程相关属性printf("\n\n\n");printf("Maxinum number of threads per multiprocessor: %d \n",
device_Property.maxThreadsPerMultiProcessor);printf("Maxinum number of threads per block:%d \n",
device_Property.maxBlocksPerMultiProcessor);printf("Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
device_Property.maxThreadsDim[0],
device_Property.maxThreadsDim[1],
device_Property.maxThreadsDim[2]);printf("Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n",
device_Property.maxGridSize[0],
device_Property.maxGridSize[1],
device_Property.maxGridSize[2]);
4. 了解设备属性的意义
为什么要做这些?
如果有多个GPU设备,它将帮助你选择具有更多多处理器的GPU设备
这些属性还将帮助你查找设备上可用的块的数量和每个块的线程数量,这将帮助你配置内核参数
下边展示执行一个双精度浮点操作的程序,这需要提前判断是否支持浮点型,如下:
//查看支持浮点操作printf("\n\n\n");printf("ID of device: %d\n", device);memset(&device_Property,0,sizeof(cudaDeviceProp));// 识别设备是否支持双精度操作的两个属性[major>1 && minor >3]
device_Property.major =1;
device_Property.minor =3;cudaChooseDevice(&device,&device_Property);printf("ID of device which supports double precision is: %d\n", device);cudaSetDevice(device);