CUDA学习(三)：查询GPU设备

最新推荐文章于 2024-07-10 15:33:28 发布

hjxu2016

最新推荐文章于 2024-07-10 15:33:28 发布

阅读量2.9k

点赞数

分类专栏：编程语言|CUDA入门

本文链接：https://blog.csdn.net/hjxu2016/article/details/107052580

版权

编程语言|CUDA入门专栏收录该内容

19 篇文章 149 订阅

订阅专栏

博主CUDA学习系列汇总传送门（持续更新）：编程语言|CUDA入门

文章目录

- 一、cudaDeviceProp 类
- 二、Demo获取显卡名称

本章节学习内容：
1、CudaDeviceProp类
2、如何查询GPU设备

一、cudaDeviceProp 类

全靠荡，中文不全可以看英文

/**
 * CUDA device properties
 */
struct __device_builtin__ cudaDeviceProp
{
    char         name[256];                  /**< 设备名称，比如1080Ti ASCII string identifying device */
    cudaUUID_t   uuid;                       /**< 16-byte unique identifier  */
    char         luid[8];                    /**< 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms */
    unsigned int luidDeviceNodeMask;         /**< LUID device node mask. Value is undefined on TCC and non-Windows platforms */
    size_t       totalGlobalMem;             /**< 设备上全局内存的总量，单位是字节 Global memory available on device in bytes  */
    size_t       sharedMemPerBlock;          /**< 在一个线程块Block中可使用的最大共享内存数量* Shared memory available per block in bytes /
    int          regsPerBlock;               /**< 每个线程块中可用的32位寄存器数量 32-bit registers available per block */
    int          warpSize;                   /**< 在一个线程束Warp中包含的线程数量 Warp size in threads */
    size_t       memPitch;                   /**< 在内存复制中最大的修正量Pitch，单位为字节 Maximum pitch in bytes allowed by memory copies */
    int          maxThreadsPerBlock;         /**< 在一个线程块中可以包含的最大线程数量 Maximum number of threads per block */
    int          maxThreadsDim[3];           /**< 在多维线程数组中，每一维可以包含的最大线程数量 Maximum size of each dimension of a block */
    int          maxGridSize[3];             /**< 在一个线程格Grid，每一维可以包含的最大线程数量 Maximum size of each dimension of a grid */
    int          clockRate;                  /**< Clock frequency in kilohertz */
    size_t       totalConstMem;              /**< 常亮内存总量 Constant memory available on device in bytes */
    int          major;                      /**< 设备计算功能集的主板号 Major compute capability */
    int          minor;                      /**< 设备计算功能集的此版本号 Minor compute capability */
    size_t       textureAlignment;           /**< 设备的纹理对齐需求Alignment requirement for textures */
    size_t       texturePitchAlignment;      /**< Pitch alignment requirement for texture references bound to pitched memory */
    int          deviceOverlap;              /**< bool类型，表示设备是否可以同时执行一个cudamemery调用和一个核函数调用 Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. */
    int          multiProcessorCount;        /**< 设备上多处理器的数量 Number of multiprocessors on device */
    int          kernelExecTimeoutEnabled;   /**< bool类型，表示该设备上的核函数是否存在运行时间限制 Specified whether there is a run time limit on kernels */
    int          integrated;                 /**< bool， 设备是否是一个集成GPU Device is integrated as opposed to discrete */
    int          canMapHostMemory;           /**< bool，表示设备是否将主机内存映射到CUDA设备地址空间 Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer */
    int          computeMode;                /**< 设备的计算模式，默认（Default），独占（Exclusize），禁止（Prohibited）Compute mode (See ::cudaComputeMode) */
    int          maxTexture1D;               /**< 一维纹理的最大大小 Maximum 1D texture size */
    int          maxTexture1DMipmap;         /**< Maximum 1D mipmapped texture size */
    int          maxTexture1DLinear;         /**< Maximum size for 1D textures bound to linear memory */
    int          maxTexture2D[2];            /**< 二维纹理的最大维数Maximum 2D texture dimensions */
    int          maxTexture2DMipmap[2];      /**< Maximum 2D mipmapped texture dimensions */
    int          maxTexture2DLinear[3];      /**< Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory */
    int          maxTexture2DGather[2];      /**< Maximum 2D texture dimensions if texture gather operations have to be performed */
    int          maxTexture3D[3];            /**< 三维纹理的最大维数 Maximum 3D texture dimensions */
    int          maxTexture3DAlt[3];         /**< Maximum alternate 3D texture dimensions */
    int          maxTextureCubemap;          /**< Maximum Cubemap texture dimensions */
    int          maxTexture1DLayered[2];     /**< Maximum 1D layered texture dimensions */
    int          maxTexture2DLayered[3];     /**< Maximum 2D layered texture dimensions */
    int          maxTextureCubemapLayered[2];/**< Maximum Cubemap layered texture dimensions */
    int          maxSurface1D;               /**< Maximum 1D surface size */
    int          maxSurface2D[2];            /**< Maximum 2D surface dimensions */
    int          maxSurface3D[3];            /**< Maximum 3D surface dimensions */
    int          maxSurface1DLayered[2];     /**< Maximum 1D layered surface dimensions */
    int          maxSurface2DLayered[3];     /**< Maximum 2D layered surface dimensions */
    int          maxSurfaceCubemap;          /**< Maximum Cubemap surface dimensions */
    int          maxSurfaceCubemapLayered[2];/**< Maximum Cubemap layered surface dimensions */
    size_t       surfaceAlignment;           /**< Alignment requirements for surfaces */
    int          concurrentKernels;          /**< bool，表示设备是否支持在同一个上下文中同时执行多个核函数 Device can possibly execute multiple kernels concurrently */
    int          ECCEnabled;                 /**< Device has ECC support enabled */
    int          pciBusID;                   /**< PCI bus ID of the device */
    int          pciDeviceID;                /**< PCI device ID of the device */
    int          pciDomainID;                /**< PCI domain ID of the device */
    int          tccDriver;                  /**< 1 if device is a Tesla device using TCC driver, 0 otherwise */
    int          asyncEngineCount;           /**< Number of asynchronous engines */
    int          unifiedAddressing;          /**< Device shares a unified address space with the host */
    int          memoryClockRate;            /**< Peak memory clock frequency in kilohertz */
    int          memoryBusWidth;             /**< Global memory bus width in bits */
    int          l2CacheSize;                /**< Size of L2 cache in bytes */
    int          maxThreadsPerMultiProcessor;/**< Maximum resident threads per multiprocessor */
    int          streamPrioritiesSupported;  /**< Device supports stream priorities */
    int          globalL1CacheSupported;     /**< Device supports caching globals in L1 */
    int          localL1CacheSupported;      /**< Device supports caching locals in L1 */
    size_t       sharedMemPerMultiprocessor; /**< Shared memory available per multiprocessor in bytes */
    int          regsPerMultiprocessor;      /**< 32-bit registers available per multiprocessor */
    int          managedMemory;              /**< Device supports allocating managed memory on this system */
    int          isMultiGpuBoard;            /**< Device is on a multi-GPU board */
    int          multiGpuBoardGroupID;       /**< Unique identifier for a group of devices on the same multi-GPU board */
    int          hostNativeAtomicSupported;  /**< Link between the device and the host supports native atomic operations */
    int          singleToDoublePrecisionPerfRatio; /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
    int          pageableMemoryAccess;       /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */
    int          concurrentManagedAccess;    /**< Device can coherently access managed memory concurrently with the CPU */
    int          computePreemptionSupported; /**< Device supports Compute Preemption */
    int          canUseHostPointerForRegisteredMem; /**< Device can access host registered memory at the same virtual address as the CPU */
    int          cooperativeLaunch;          /**< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel */
    int          cooperativeMultiDeviceLaunch; /**< Device can participate in cooperative kernels launched via ::cudaLaunchCooperativeKernelMultiDevice */
    size_t       sharedMemPerBlockOptin;     /**< Per device maximum shared memory per block usable by special opt in */
    int          pageableMemoryAccessUsesHostPageTables; /**< Device accesses pageable memory via the host's page tables */
    int          directManagedMemAccessFromHost; /**< Host can directly access managed memory on the device without migration. */
};

二、Demo获取显卡名称

#include <iostream>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
int main() {
    cudaError_t cudaStatus;
    cudaDeviceProp cuInfo; // gpu属性的类
    int l32count;
    cudaStatus = cudaGetDeviceCount( &l32count); // 获取GPU数量
    if(cudaStatus != cudaSuccess)
    {
        fprintf(stderr, "cudaGetDeviceCount failed!");
        return 1;
    }
    std::cout << "the number of gpu: " << l32count << std::endl;

    for(int i=0; i<l32count; ++i)
    {
        cudaStatus = cudaGetDeviceProperties(&cuInfo, i); // 获取GPU信息
        if(cudaStatus != cudaSuccess)
        {
            fprintf(stderr, "cudaGetDeviceProperties failed!");
            return 1;
        }
        printf("Name: %s\n", cuInfo.name);
    }
    std::cout << "Hello, World!" << std::endl;
    return 0;
}