查看NVIDIA GPU设备节点映射关系

一.参考链接

二.小结

  • NVIDIA_VISIBLE_DEVICES是nvidia-container的环境变量
  • CUDA_VISIBLE_DEVICES 是CUDA SDK的环境变量
  • 宿主机上CUDA_VISIBLE_DEVICES不会影响NVML和nvidia-smi的行为,会影响cuda api(cudaGetDeviceCount)的行为
  • nvidia-container里CUDA_VISIBLE_DEVICES只能是容器里逻辑设备列表的子集,按CUDA_VISIBLE_DEVICES后面的设备顺序枚举,生成逻辑设备ID

三.生成测试程序

tee cuda_api_sample.cpp<<-'EOF'
#include <iostream>
#include <cuda_runtime.h>
#include <iostream>
#include <vector>
#include <stdio.h>
#include <assert.h>
#include <cstdio>
#include <cuda.h>

#define CHECK_CUDA(call)                                           \
    do {                                                           \
        cudaError_t err = call;                                    \
        if (err != cudaSuccess) {                                  \
            std::cerr << "CUDA error at " << __FILE__ << ":" << __LINE__; \
            std::cerr << " code=" << err << " (" << cudaGetErrorString(err) << ")" << std::endl; \
            exit(EXIT_FAILURE);                                    \
        }                                                          \
    } while (0)

int main(int argc,char *argv[])
{
    int deviceCount;
    CHECK_CUDA(cudaGetDeviceCount(&deviceCount));
    printf("deviceCount:%d\n",deviceCount);
    for(int i=0;i<deviceCount;i++)
    {
        int deviceid=i;
        CHECK_CUDA(cudaSetDevice(deviceid));      
        cudaDeviceProp prop;
        CHECK_CUDA(cudaGetDeviceProperties(&prop, deviceid));
        char uuid_str[33];
        for (int j = 0; j < 16; ++j) {
            sprintf(&uuid_str[j * 2], "%02x", prop.uuid.bytes[j]);
        }
        uuid_str[32] = '\0';
        std::cout << "Device:" <<deviceid<<  " "  <<prop.name <<"   UUID:" << uuid_str << std::endl;    
    }
    return 0;
}
EOF
g++  -o cuda_api_sample cuda_api_sample.cpp  -I /usr/local/cuda/include -L /usr/local/cuda/lib64 -lcuda -lcudart

tee nvml_sample.cpp<<-'EOF'
#include <stdio.h>
#include <nvml.h>

int main() {
    nvmlReturn_t result;
    unsigned int device_count, i;
    nvmlDevice_t device;
    char uuid[NVML_DEVICE_UUID_BUFFER_SIZE];
    // Initialize NVML
    result = nvmlInit();
    if (NVML_SUCCESS != result) {
        printf("Failed to initialize NVML: %s\n", nvmlErrorString(result));
        return 1;
    }
    // Get the number of devices
    result = nvmlDeviceGetCount(&device_count);
    if (NVML_SUCCESS != result) {
        printf("Failed to get device count: %s\n", nvmlErrorString(result));
        nvmlShutdown();
        return 1;
    }
    printf("Found %u devices\n", device_count);
    // Iterate through devices and get UUID
    for (i = 0; i < device_count; i++) {
        result = nvmlDeviceGetHandleByIndex(i, &device);
        if (NVML_SUCCESS != result) {
            printf("Failed to get handle for device %u: %s\n", i, nvmlErrorString(result));
            continue;
        }
        result = nvmlDeviceGetUUID(device, uuid, sizeof(uuid));
        if (NVML_SUCCESS != result) {
            printf("Failed to get UUID for device %u: %s\n", i, nvmlErrorString(result));
        } else {
            printf("Device %u UUID: %s\n", i, uuid);
        }
    }
    // Shutdown NVML
    nvmlShutdown();
    return 0;
}
EOF
g++  -o nvml_sample nvml_sample.cpp  -I /usr/local/cuda/include -L /usr/local/cuda/lib64 -lcuda -lcudart -lnvidia-ml
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Hi20240217

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值