博主CUDA学习系列汇总传送门(持续更新):编程语言|CUDA入门
本章节学习内容:
1、CudaDeviceProp类
2、如何查询GPU设备
一、cudaDeviceProp 类
全靠荡,中文不全可以看英文
/**
* CUDA device properties
*/
struct __device_builtin__ cudaDeviceProp
{
char name[256]; /**< 设备名称,比如1080Ti ASCII string identifying device */
cudaUUID_t uuid; /**< 16-byte unique identifier */
char luid[8]; /**< 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms */
unsigned int luidDeviceNodeMask; /**< LUID device node mask. Value is undefined on TCC and non-Windows platforms */
size_t totalGlobalMem; /**< 设备上全局内存的总量,单位是字节 Global memory available on device in bytes */
size_t sharedMemPerBlock; /**< 在一个线程块Block中可使用的最大共享内存数量* Shared memory available per block in bytes /
int regsPerBlock; /**< 每个线程块中可用的32位寄存器数量 32-bit registers available per block */
int warpSize; /**< 在一个线程束Warp中包含的线程数量 Warp size in threads */
size_t memPitch; /**< 在内存复制中最大的修正量Pitch,单位为字节 Maximum pitch in bytes allowed by memory copies */
int maxThreadsPerBlock; /**< 在一个线程块中可以包含的最大线程数量 Maximum number of threads per block */
int maxThreadsDim[3]; /**< 在多维线程数组中,每一维可以包含的最大线程数量 Maximum size of each dimension of a block */
int maxGridSize[3]; /**< 在一个线程格Grid,每一维可以包含的最大线程数量 Maximum size of each dimension of a grid */
int clockRate; /**< Clock frequency in kilohertz */
size_t totalConstMem; /**< 常亮内存总量 Constant memory available on device in bytes */
int major; /**< 设备计算功能集的主板号 Major compute capability */
int minor; /**< 设备计算功能集的此版本号 Minor compute capability */
size_t textureAlignment; /**< 设备的纹理对齐需求Alignment requirement for textures */
size_t texturePitchAlignment; /**< Pitch alignment requirement for texture references bound to pitched memory */
int deviceOverlap; /**< bool类型,表示设备是否可以同时执行一个cudamemery调用和一个核函数调用 Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. */
int multiProcessorCount; /**< 设备上多处理器的数量 Number of multiprocessors on device */
int kernelExecTimeoutEnabled; /**< bool类型,表示该设备上的核函数是否存在运行时间限制 Specified whether there is a run time limit on kernels */
int integrated; /**< bool, 设备是否是一个集成GPU Device is integrated as opposed to discrete */
int canMapHostMemory; /**< bool,表示设备是否将主机内存映射到CUDA设备地址空间 Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer */
int computeMode; /**< 设备的计算模式,默认(Default),独占(Exclusize),禁止(Prohibited)Compute mode (See ::cudaComputeMode) */
int maxTexture1D; /**< 一维纹理的最大大小 Maximum 1D texture size */
int maxTexture1DMipmap; /**< Maximum 1D mipmapped texture size */
int maxTexture1DLinear; /**< Maximum size for 1D textures bound to linear memory */
int maxTexture2D[2]; /**< 二维纹理的最大维数Maximum 2D texture dimensions */
int maxTexture2DMipmap[2]; /**< Maximum 2D mipmapped texture dimensions */
int maxTexture2DLinear[3]; /**< Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory */
int maxTexture2DGather[2]; /**< Maximum 2D texture dimensions if texture gather operations have to be performed */
int maxTexture3D[3]; /**< 三维纹理的最大维数 Maximum 3D texture dimensions */
int maxTexture3DAlt[3]; /**< Maximum alternate 3D texture dimensions */
int maxTextureCubemap; /**< Maximum Cubemap texture dimensions */
int maxTexture1DLayered[2]; /**< Maximum 1D layered texture dimensions */
int maxTexture2DLayered[3]; /**< Maximum 2D layered texture dimensions */
int maxTextureCubemapLayered[2];/**< Maximum Cubemap layered texture dimensions */
int maxSurface1D; /**< Maximum 1D surface size */
int maxSurface2D[2]; /**< Maximum 2D surface dimensions */
int maxSurface3D[3]; /**< Maximum 3D surface dimensions */
int maxSurface1DLayered[2]; /**< Maximum 1D layered surface dimensions */
int maxSurface2DLayered[3]; /**< Maximum 2D layered surface dimensions */
int maxSurfaceCubemap; /**< Maximum Cubemap surface dimensions */
int maxSurfaceCubemapLayered[2];/**< Maximum Cubemap layered surface dimensions */
size_t surfaceAlignment; /**< Alignment requirements for surfaces */
int concurrentKernels; /**< bool,表示设备是否支持在同一个上下文中同时执行多个核函数 Device can possibly execute multiple kernels concurrently */
int ECCEnabled; /**< Device has ECC support enabled */
int pciBusID; /**< PCI bus ID of the device */
int pciDeviceID; /**< PCI device ID of the device */
int pciDomainID; /**< PCI domain ID of the device */
int tccDriver; /**< 1 if device is a Tesla device using TCC driver, 0 otherwise */
int asyncEngineCount; /**< Number of asynchronous engines */
int unifiedAddressing; /**< Device shares a unified address space with the host */
int memoryClockRate; /**< Peak memory clock frequency in kilohertz */
int memoryBusWidth; /**< Global memory bus width in bits */
int l2CacheSize; /**< Size of L2 cache in bytes */
int maxThreadsPerMultiProcessor;/**< Maximum resident threads per multiprocessor */
int streamPrioritiesSupported; /**< Device supports stream priorities */
int globalL1CacheSupported; /**< Device supports caching globals in L1 */
int localL1CacheSupported; /**< Device supports caching locals in L1 */
size_t sharedMemPerMultiprocessor; /**< Shared memory available per multiprocessor in bytes */
int regsPerMultiprocessor; /**< 32-bit registers available per multiprocessor */
int managedMemory; /**< Device supports allocating managed memory on this system */
int isMultiGpuBoard; /**< Device is on a multi-GPU board */
int multiGpuBoardGroupID; /**< Unique identifier for a group of devices on the same multi-GPU board */
int hostNativeAtomicSupported; /**< Link between the device and the host supports native atomic operations */
int singleToDoublePrecisionPerfRatio; /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
int pageableMemoryAccess; /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */
int concurrentManagedAccess; /**< Device can coherently access managed memory concurrently with the CPU */
int computePreemptionSupported; /**< Device supports Compute Preemption */
int canUseHostPointerForRegisteredMem; /**< Device can access host registered memory at the same virtual address as the CPU */
int cooperativeLaunch; /**< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel */
int cooperativeMultiDeviceLaunch; /**< Device can participate in cooperative kernels launched via ::cudaLaunchCooperativeKernelMultiDevice */
size_t sharedMemPerBlockOptin; /**< Per device maximum shared memory per block usable by special opt in */
int pageableMemoryAccessUsesHostPageTables; /**< Device accesses pageable memory via the host's page tables */
int directManagedMemAccessFromHost; /**< Host can directly access managed memory on the device without migration. */
};
二、Demo获取显卡名称
#include <iostream>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
int main() {
cudaError_t cudaStatus;
cudaDeviceProp cuInfo; // gpu属性的类
int l32count;
cudaStatus = cudaGetDeviceCount( &l32count); // 获取GPU数量
if(cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaGetDeviceCount failed!");
return 1;
}
std::cout << "the number of gpu: " << l32count << std::endl;
for(int i=0; i<l32count; ++i)
{
cudaStatus = cudaGetDeviceProperties(&cuInfo, i); // 获取GPU信息
if(cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaGetDeviceProperties failed!");
return 1;
}
printf("Name: %s\n", cuInfo.name);
}
std::cout << "Hello, World!" << std::endl;
return 0;
}