带错误检测的CUDA资源管理

用户名溢出

已于 2023-07-01 11:17:50 修改

阅读量138

点赞数

分类专栏： Cuda 文章标签：开发语言 c++

于 2023-04-23 19:30:46 首次发布

本文链接：https://blog.csdn.net/bocai1215/article/details/130328727

版权

Cuda 专栏收录该内容

16 篇文章 2 订阅

订阅专栏

项目中发现很多cuda代码很冗余，主要是有一些cuda内存相关的操作，比如cudaMemcpy之后，要进行错误检测，所以修改了一版，实现接口内部自己检测，这样代码看起来不会那么乱。

为什么使用宏定义，而不是函数定义，是因为函数定义的话，如果希望在出错误的时候直接输出出错位置的文件和行号，就需要在使用接口的时候，将__FILE__和__LINE__传入进去，有点麻烦，因此使用了宏定义的方式

下面是具体代码：

#pragma once
#include <assert.h>
#include <cuda_runtime.h>
#include <string>

// 向上取整
#define iDIV_UP(a, b) ((a + b - 1) / b)
#define ALG_MAX(x,y) ((x)>(y)?(x):(y))
#define ALG_MIN(x,y) ((x)<(y)?(x):(y))
#define FLOAT_EPS  1e-6
#define FLOAT_EQUAL(v1, v2) ((fabs((v1)-(v2))) < (FLOAT_EPS))

// cblas错误检查
#define CheckCBlasError(ErrorID)        \
{                                       \
    if(CUBLAS_STATUS_SUCCESS != ErrorID)\
    {                                   \
        printf("=====Imaging Error CBlas: %s, line: %d of file: %s\n", cublasGetStatusString(ErrorID), __LINE__, __FILE__);\
        assert(false);                  \
    }                                   \
}

#define CheckCudaError(ErrorId){\
    if (cudaSuccess != ErrorId)\
    { \
        printf("=====Imaging Error Cuda: %s, file: %s : %d\n", cudaGetErrorString(ErrorId), __FILE__, __LINE__);assert(false);\
    }\
}

// Cuda显存释放
#define Cuda_Free(pData){ \
    if (nullptr != pData){\
        cudaError_t error_id = cudaFree(pData);\
        pData = nullptr;\
        CheckCudaError(error_id);\
    }\
}

// Cuda显存设置值
#define Cuda_Memset(devPtr, iValue, iSize){\
    void** ptr = (void**)&devPtr;\
    if (ptr != nullptr){\
        auto error_id = cudaMemset(*ptr, iValue, iSize);\
        CheckCudaError(error_id);\
    }\
}

// 显存申请（这个宏有问题，应该使用后面那个）
// 该宏的问题是，如果外面传进来的pData是个空指针的话，那这个位置访问空指针，肯定是不对的
#define Cuda_Malloc(pData, iSize){\
    Cuda_Free(*pData);\
    cudaError_t error_id = cudaMalloc(pData, iSize);\
    CheckCudaError(error_id);\
}



// 显存申请
// 普通显存申请是cudMalloc(void**)&ptr, size);
// 使用下面宏的话，只需要Cuda_Malloc(ptr, size)即可
#define Cuda_Malloc(pData, iSize){\
    Cuda_Free(pData);\
    auto p_cu_malloc_data = (void**)&pData;\
    cudaError_t error_id = cudaMalloc(p_cu_malloc_data, iSize);\
    CheckCudaError(error_id);\
}


#define Cuda_Memcpy(pDst, pSrc, iSize, cpyKind){\
    auto error_id = cudaMemcpy(pDst, pSrc, iSize, cpyKind);\
    CheckCudaError(error_id);\
}

第二版本

项目中发现，现存使用统计起来比较麻烦，需要自己手动去计算使用了多少，因此在上一版本的基础上进行了部分优化，能够初步实现在申请的时候就记录申请了多少显存

头文件：

#pragma once
#include <assert.h>
#include <cuda_runtime.h>
#include <string>
#include <map>
#include <thread>
#include <mutex>
#include <sstream>
#include <fstream>

// 向上取整
#define iDIV_UP(a, b) ((a + b - 1) / b)
#define ALG_MAX(x,y) ((x)>(y)?(x):(y))
#define ALG_MIN(x,y) ((x)<(y)?(x):(y))
#define FLOAT_EPS  1e-6
#define FLOAT_EQUAL(v1, v2) ((fabs((v1)-(v2))) < (FLOAT_EPS))

//-------------------
// 显存统计检测代码
// ------------------
extern std::map<std::thread::id, std::map<std::string, std::pair<std::string, size_t>>> gAlgCudaMemCheckMap;
extern std::mutex gAlgCudaMemCheckMutex;
extern bool gbAlgCudaCheckFlag;
void OutCudaMemoryMessage();

// cblas错误检查
#define CheckCBlasError(ErrorID)        \
{                                       \
    if(CUBLAS_STATUS_SUCCESS != ErrorID)\
    {                                   \
        printf("=====Imaging Error CBlas: %s, line: %d of file: %s\n", cublasGetStatusString(ErrorID), __LINE__, __FILE__);\
        assert(false);                  \
    }                                   \
}


#define CheckCudaError(ErrorId){\
    if (cudaSuccess != ErrorId)\
    { \
        printf("=====Imaging Error Cuda: %s, file: %s : %d\n", cudaGetErrorString(ErrorId), __FILE__, __LINE__);assert(false);\
    }\
}


// Cuda显存释放
#define Cuda_Free(pData){ \
    /*下面是显存申请统计的相关代码*/   
    if (gbAlgCudaCheckFlag && nullptr != pData){                           \
        auto p_cu_malloc_data = (void**)&pData;                            \
        std::lock_guard<std::mutex> lock(gAlgCudaMemCheckMutex);           \
        auto iter = gAlgCudaMemCheckMap.find(std::this_thread::get_id());  \
        if (iter != gAlgCudaMemCheckMap.end())                             \
        {   std::stringstream ss;                                          \
            ss << *p_cu_malloc_data;                                       \
            auto iter_ptr = iter->second.find(ss.str());                   \
            if (iter_ptr != iter->second.end())                            \
                iter->second.erase(iter_ptr);                              \
        }
    }                                                                      \
    if (nullptr != pData){                      \
        cudaError_t error_id = cudaFree(pData); \
        pData = nullptr;                        \
        CheckCudaError(error_id);               \
    }                                           \
} 


// Cuda显存设置值
#define Cuda_Memset(devPtr, iValue, iSize){\
    void** ptr = (void**)&devPtr;\
    if (ptr != nullptr){\
        auto error_id = cudaMemset(*ptr, iValue, iSize);\
        CheckCudaError(error_id);\
    }\
}


// 显存申请
#define Cuda_Malloc(pData, iSize){\
    Cuda_Free(pData);\
    auto p_cu_malloc_data = (void**)&pData;\
    cudaError_t error_id = cudaMalloc(p_cu_malloc_data, iSize);\
    CheckCudaError(error_id);\
                                                              \
    /*下面是显存申请统计的相关代码*/                            \
    if (gbAlgCudaCheckFlag)                                     \
    {                                                           \
        std::lock_guard<std::mutex> lock(gAlgCudaMemCheckMutex);    \
        std::stringstream ss;                                       \
        ss <<*p_cu_malloc_data;                                                 \
        gAlgCudaMemCheckMap[std::this_thread::get_id()][ss.str()] =                 
        std::make_pair(__FILE__ + std::string(":") + std::to_string(__LINE__), iSize);\
    }\
}


#define Cuda_Memcpy(pDst, pSrc, iSize, cpyKind){\
    auto error_id = cudaMemcpy(pDst, pSrc, iSize, cpyKind);\
    CheckCudaError(error_id);\
}

.cu文件

#include "cudaCommon.cuh"

std::map<std::thread::id, std::map<std::string, std::pair<std::string, size_t>>> gAlgCudaMemCheckMap;
std::mutex gAlgCudaMemCheckMutex;
bool gbAlgCudaCheckFlag = false;

void OutCudaMemoryMessage()
{
    if (!gbAlgCudaCheckFlag)
    {
        return;
    }

    std::lock_guard<std::mutex> lock(gAlgCudaMemCheckMutex);
    for (auto& iter_thread : gAlgCudaMemCheckMap)
    {
        if (iter_thread.second.size() > 0)
        {
            std::stringstream ss;
            ss << iter_thread.first;
            std::string str_path = ss.str() + "MemInfo.txt";
            std::ofstream ofile;

            ofile.open(str_path, std::ios::app);
            if (ofile.is_open())
            {
                ofile << "===================================\n";
                ofile << "===================================\n";
                ofile << "===================================\n";
                auto& addr_map = iter_thread.second;
                for (auto& addr_iter : addr_map)
                {
                    ofile << "File: " << addr_iter.second.first << " : " <<addr_iter.second.second << "\n";
                }
                addr_map.clear();
            }
            ofile.close();
        }
    }
}

如果哪个小伙伴知道更好的实现方式，或者有现成的工具方便统计的话，希望能留言告知交流，感谢