在qtcreator中cuda驱动初始化和使用、上下文管理设置，使用驱动API进行内存分配

Good@dz

已于 2022-04-03 22:24:25 修改

阅读量1.1k

点赞数

分类专栏： tensorrt 文章标签：人工智能机器学习 tensorrt

于 2022-04-03 08:36:31 首次发布

本文链接：https://blog.csdn.net/qq_42178122/article/details/123932394

版权

tensorrt 专栏收录该内容

8 篇文章 4 订阅

订阅专栏

cuInit-驱动初始化

1.cuInit用于初始化驱动API，如果不执行，cuda的所有API都会返回错误，全局执行一次就行
2.不需要cuDestroy，程序会自动销毁并释放

获取驱动版本、设备名称

1.cuDriverGetVersion(&driver_version)：运行cuDriverGetVersion，驱动版本赋值给driver_version
2.cuDeviceGetName(device_name, sizeof(device_name), device)：运行cuDeviceGetName，即可设备名称

在qt中实战一

在pro中，

#cuda cudnn
INCLUDEPATH += /usr/local/cuda/include
LIBS += -L/usr/local/cuda/lib64
LIBS += -lcuda  #要添加这个哦，不然会出现error: undefined reference to `cuInit'的错误

  #include <stdio.h>
    #include <stdlib.h>
    #include <cuda.h>

    int main(int argc, char const *argv[])
    {
        // 1. 任何驱动API调用之前必须先初始化
        CUresult re = cuInit(0);
        if (re != CUDA_SUCCESS){
            printf("cuda设备初始化失败！\n");
            exit(EXIT_FAILURE);
        }
        printf("cuda设备初始化成功！\n");
        // 2. 获取版本
        int cu_version;
        re = cuDriverGetVersion (&cu_version);
        if(re == CUDA_SUCCESS){
            printf("获取版本是：%d\n", cu_version);
        }else{
            printf("获取版本失败！\n");
        }
        return 0;
    }

}

参考：https://www.jianshu.com/p/15dc69405791?from=timeline

上下文管理设置

1.context是一种上下文，管理对gpu上所有的操作
2.一般来说，一个线程基本固定访问一个显卡，且只使用一个context
3.使用cuDevicePrimaryCtxRetain，为设备关联主context，分配、释放、设置（第一次调用cuDevicePrimaryCtxRetain，则会生成新的context；往后的调用都会用第一次调用的context）

在qt中实战二

// CUDA驱动头文件cuda.h
#include <cuda.h>   // include <> 和 "" 的区别    
#include <stdio.h>  // include <> : 标准库文件 
#include <string.h> // include "" : 自定义文件  详细情况请查看 readme.md -> 5

#define checkDriver(op)  __check_cuda_driver((op), #op, __FILE__, __LINE__)

bool __check_cuda_driver(CUresult code, const char* op, const char* file, int line){
    if(code != CUresult::CUDA_SUCCESS){    // 如果 成功获取CUDA情况下的返回值 与我们给定的值(0)不相等， 即条件成立， 返回值为flase
        const char* err_name = nullptr;    // 定义了一个字符串常量的空指针
        const char* err_message = nullptr;  
        cuGetErrorName(code, &err_name);    
        cuGetErrorString(code, &err_message);   
        printf("%s:%d  %s failed. \n  code = %s, message = %s\n", file, line, op, err_name, err_message); //打印错误信息
        return false;
    }
    return true;
}

int main(){
     // 1. 任何驱动API调用之前必须先初始化
     CUresult re = cuInit(0);
     CUdevice device = 0;
    // 使用cuDevicePrimaryCtxRetain获取与设备关联的context
     CUcontext ctxA = nullptr;  
    checkDriver(cuDevicePrimaryCtxRetain(&ctxA, device));       // 在 device 上指定一个地址对ctxA进行管理
    printf("ctxA = %p\n", ctxA);
    checkDriver(cuDevicePrimaryCtxRelease(device));
    return 0;
}

使用驱动API进行内存分配

// CUDA驱动头文件cuda.h
#include <cuda.h>

#include <stdio.h>
#include <string.h>

#define checkDriver(op)  __check_cuda_driver((op), #op, __FILE__, __LINE__)

bool __check_cuda_driver(CUresult code, const char* op, const char* file, int line){

    if(code != CUresult::CUDA_SUCCESS){    
        const char* err_name = nullptr;    
        const char* err_message = nullptr;  
        cuGetErrorName(code, &err_name);    
        cuGetErrorString(code, &err_message);   
        printf("%s:%d  %s failed. \n  code = %s, message = %s\n", file, line, op, err_name, err_message);   
        return false;
    }
    return true;
}

int main(){

    // 检查cuda driver的初始化
    checkDriver(cuInit(0));

    // 创建上下文
    CUcontext context = nullptr;
    CUdevice device = 0;
    checkDriver(cuCtxCreate(&context, CU_CTX_SCHED_AUTO, device));
    printf("context = %p\n", context);

    // 输入device prt向设备要一个100 byte的线性内存，并返回地址
    CUdeviceptr device_memory_pointer = 0;
    checkDriver(cuMemAlloc(&device_memory_pointer, 100)); // 注意这是指向device的pointer, 
    printf("device_memory_pointer = %p\n", device_memory_pointer);

    // 输入二级指针向host要一个100 byte的锁页内存，专供设备访问
    float* host_page_locked_memory = nullptr;
    checkDriver(cuMemAllocHost((void**)&host_page_locked_memory, 100));
    printf("host_page_locked_memory = %p\n", host_page_locked_memory);

    // 向page-locked memory 里放数据（仍在CPU上），可以让GPU可快速读取
    host_page_locked_memory[0] = 123;
    printf("host_page_locked_memory[0] = %f\n", host_page_locked_memory[0]);

    
    float new_value = 555;
    checkDriver(cuMemsetD32((CUdeviceptr)host_page_locked_memory, *(int*)&new_value, 1)); 
    printf("host_page_locked_memory[0] = %f\n", host_page_locked_memory[0]);
    // 释放内存
    checkDriver(cuMemFreeHost(host_page_locked_memory));
    return 0;
}