VS中使用CUDA命令打印GPU硬件信息

最新推荐文章于 2023-08-27 22:08:18 发布

Awesome Baron

最新推荐文章于 2023-08-27 22:08:18 发布

阅读量413

点赞数

分类专栏： CUDA并行计算文章标签： c语言

本文链接：https://blog.csdn.net/m0_46464899/article/details/130710952

版权

CUDA并行计算专栏收录该内容

6 篇文章 2 订阅

订阅专栏

记录一下在VS2017使用CUDA的接口，打印GPU的硬件信息。

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>

/**
 * @brief print device properties
 * 
 * @param prop 
 */
void showDeviceProp(cudaDeviceProp &prop) {
    printf("Device name: %s\n", prop.name);
    printf("  Compute capability: %d.%d\n", prop.major, prop.minor);
    printf("  Clock rate: %d\n", prop.clockRate);
    printf("  Memory clock rate: %d\n", prop.memoryClockRate);
    printf("  Memory bus width: %d\n", prop.memoryBusWidth);
    printf("  Peak memory bandwidth: %d\n", prop.memoryBusWidth);
    printf("  Total global memory: %lu\n", prop.totalGlobalMem);
    printf("  Total shared memory per block: %lu\n", prop.sharedMemPerBlock);
    printf("  Total registers per block: %d\n", prop.regsPerBlock);
    printf("  Warp size: %d\n", prop.warpSize);
    printf("  Maximum memory pitch: %lu\n", prop.memPitch);
    printf("  Maximum threads per block: %d\n", prop.maxThreadsPerBlock);
    printf("  Maximum dimension of block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
    printf("  Maximum dimension of grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
    printf("  Maximum memory alloc size: %lu\n", prop.totalConstMem);
    printf("  Texture alignment: %lu\n", prop.textureAlignment);
    printf("  Concurrent copy and execution: %s\n", prop.deviceOverlap ? "Yes" : "No");
    printf("  Number of multiprocessors: %d\n", prop.multiProcessorCount);
    printf("  Kernel execution timeout: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
    printf("  Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No");
}

int main() {
    int num_devices;
    cudaDeviceProp properties;
    cudaGetDeviceCount(&num_devices);
    printf("%d CUDA devices found\n", num_devices);
    for (int i = 0; i < num_devices; i++) {
        cudaGetDeviceProperties(&properties, i);
        printf("Device %d: \"%s\"\n", i, properties.name);
        showDeviceProp(properties);
    }

    return 0;
}

打印如下：

1 CUDA devices found
Device 0: "NVIDIA GeForce RTX 3080"
Device name: NVIDIA GeForce RTX 3080
  Compute capability: 8.6
  Clock rate: 1710000
  Memory clock rate: 9501000
  Memory bus width: 384
  Peak memory bandwidth: 384
  Total global memory: 4294443008
  Total shared memory per block: 49152
  Total registers per block: 65536
  Warp size: 32
  Maximum memory pitch: 2147483647
  Maximum threads per block: 1024
  Maximum dimension of block: 1024 x 1024 x 64
  Maximum dimension of grid: 2147483647 x 65535 x 65535
  Maximum memory alloc size: 65536
  Texture alignment: 512
  Concurrent copy and execution: Yes
  Number of multiprocessors: 70
  Kernel execution timeout: Yes
  Integrated GPU sharing Host Memory: No