项目场景 with ERRTYPE = cudaError CUDA failure 999 unknown error

在将CUDA从10.2升级到11.2并使用onnxruntime-gpu 1.10及Python 3.9.7时遇到CUDA failure 999错误。错误源于旧驱动未完全卸载。解决方案包括卸载旧版CUDA,清理遗留驱动,然后安装最新驱动。

项目场景 [with ERRTYPE = cudaError; bool THRW = true] CUDA failure 999: unknown error ; GPU=24 :

需要升级之前老的程序,之前的cuda 是10.2


问题描述:

环境

cuda 11.2 (之前是10.2)

onnxruntime-gpu 1.10

python 3.9.7

在这里插入图片描述

启动程序的时候

Traceback (most recent call last):
  File "/home/aiuser/cover/liheng-foggun/app.py", line 15, in <module>
    model = DetectMultiBackend(weights=config.paddle.model_file)
  File "/home/aiuser/miniconda3/envs/cover/lib/python3.9/site-packag
(yolo8) root@jetlcc-desktop:/home/jetlcc/len/tools/yolov8# yolo predict model=yolov8n.onnx source="ultralytics/assets/bus.jpg" WARNING ⚠️ Unable to automatically guess model task, assuming 'task=detect'. Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify','pose' or 'obb'. Ultralytics 8.3.167 🚀 Python-3.8.20 torch-1.11.0a0+gitbc2c6ed CUDA:0 (NVIDIA Tegra X1, 1972MiB) Loading yolov8n.onnx for ONNX Runtime inference... Using ONNX Runtime CUDAExecutionProvider 2025-07-17 10:46:27.513092611 [E:onnxruntime:, inference_session.cc:1588 operator()] Exception during initialization: /home/onnxruntime/onnxruntime/onnxruntime/core/providers/cuda/cuda_call.cc:122 bool onnxruntime::CudaCall(ERRTYPE, const char*, const char*, ERRTYPE, const char*) [with ERRTYPE = cudnnStatus_t; bool THRW = true] /home/onnxruntime/onnxruntime/onnxruntime/core/providers/cuda/cuda_call.cc:116 bool onnxruntime::CudaCall(ERRTYPE, const char*, const char*, ERRTYPE, const char*) [with ERRTYPE = cudnnStatus_t; bool THRW = true] CUDNN failure 1: CUDNN_STATUS_NOT_INITIALIZED ; GPU=0 ; hostname=jetlcc-desktop ; expr=cudnnCreate(&cudnn_handle_); Traceback (most recent call last): File "/root/miniconda3/envs/yolo8/bin/yolo", line 8, in <module> sys.exit(entrypoint()) File "/root/miniconda3/envs/yolo8/lib/python3.8/site-packages/ultralytics/cfg/__init__.py", line 983, in entrypoint getattr(model, mode)(**overrides) # default args from model File "/root/miniconda3/envs/yolo8/lib/python3.8/site-packages/ultralytics/engine/model.py", line 548, in predict self.predictor.setup_model(model=self.model, verbose=is_cli) File "/root/miniconda3/envs/yolo8/lib/python3.8/site-packages/ultralytics/engine/predictor.py", line 391, in setup_model self.model = AutoBackend( File "/root/miniconda3/envs/yolo8/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context return func(*args, **kwargs) File "/root/miniconda3/envs/yolo8/lib/python3.8/site-packages/ultralytics/nn/autobackend.py", line 259, in __init__ session = onnxruntime.InferenceSession(w, providers=providers) File "/root/miniconda3/envs/yolo8/lib/python3.8/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 335, in __init__ self._create_inference_session(providers, provider_options, disabled_optimizers) File "/root/miniconda3/envs/yolo8/lib/python3.8/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 381, in _create_inference_session sess.initialize_session(providers, provider_options, disabled_optimizers) onnxruntime.capi.onnxruntime_pybind11_state.RuntimeException: [ONNXRuntimeError] : 6 : RUNTIME_EXCEPTION : Exception during initialization: /home/onnxruntime/onnxruntime/onnxruntime/core/providers/cuda/cuda_call.cc:122 bool onnxruntime::CudaCall(ERRTYPE, const char*, const char*, ERRTYPE, const char*) [with ERRTYPE = cudnnStatus_t; bool THRW = true] /home/onnxruntime/onnxruntime/onnxruntime/core/providers/cuda/cuda_call.cc:116 bool onnxruntime::CudaCall(ERRTYPE, const char*, const char*, ERRTYPE, const char*) [with ERRTYPE = cudnnStatus_t; bool THRW = true] CUDNN failure 1: CUDNN_STATUS_NOT_INITIALIZED ; GPU=0 ; hostname=jetlcc-desktop ; expr=cudnnCreate(&cudnn_handle_); (yolo8) root@jetlcc-desktop:/home/jetlcc/len/tools/yolov8#
07-18
#include "stdafx.h" #include "LPEAlgoriAPI.h" #include "hedyErrHandler.h" #include "LPEAWithNRandAEC.h" #include "LPEAlgoriNameDef.h" using namespace HEDYDMPL; #define pLPE (LPEAWithNRandAEC::multiInstManager()) // 功能: // 导入待处理的图像数据和图像信息,目前只支持16位无符号数据的处理 // 参数: // pInImg 待处理16位无符号图像 // nWidth 图像的宽 // nHeight 图像的高 // strVersion 算法库版本 // nLevelNum 金字塔层数 // fDESpacing 平板的像素尺寸 // fAECThres AEC设定的乳腺均值 // fPreMaxi 平板最大值 // fNewMaxi 归一化最大值 // 返回: // 导入图像数据成功返回SUCCESS,否则返回错误代码 ERRTYPE LPEALGORI_API ImportAlgoriInfo( PWORD pInImg, INT nWidth, INT nHeight, CString strVersion, INT nLevelNum, FLOAT fDESpacing, FLOAT fAECThres, FLOAT fPreBkg, FLOAT fNewBkg) { HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module start. ================="); int nLen = strVersion.GetLength(); char* charVersion = new char [nLen + 1]; WideCharToMultiByte( CP_OEMCP, NULL, strVersion, -1, charVersion, nLen, 0, NULL); charVersion[nLen] = '\0'; HEDYERRHANDLER_logFile("Version: %s", charVersion); delete [] charVersion; if(pLPE.init(pInImg, nWidth, nHeight, nLevelNum, fDESpacing, fAECThres, fPreBkg, fNewBkg)){ pLPE.destroy(); HEDYERRHANDLER_logFile("Error in function ImportAlgoriInfo."); HEDYERRHANDLER_errLogFile(INNER_ERR, "Initialization failed!"); HEDYERRHANDLER_logFile("Mammography enhancement failed."); HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module ended. =================\n\n"); return INNER_ERR; } return SUCCESS; } // 功能: // 设置预处理参数 // 参数: // strOprType 灰度值映射曲线类型:Log, ModLog, Sqrt // pfParamList 各曲线设置参数集 // 返回: // 参数设置成功返回SUCCESS,否则返回错误代码 ERRTYPE LPEALGORI_API SetPreProcPara( BOOL isMark, INT nMarkX, INT nMarkY, INT nMarkH, INT nMarkW, CString strOprType, PFLOAT pfParamList) { // 1. 背景对象分割 if(pLPE.segBkgObj(isMark, nMarkX, nMarkY, nMarkH, nMarkW)){ pLPE.destroy(); HEDYERRHANDLER_errLogFile(INNER_ERR, "segBkgObj failed"); HEDYERRHANDLER_logFile("Mammography enhancement failed."); HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module ended. =================\n\n"); return INNER_ERR; } // 2. 对图像像素灰度值进行归一化 if(pLPE.grayValueNormalization()){ pLPE.destroy(); HEDYERRHANDLER_errLogFile(INNER_ERR, "grayValueNormalization failed."); HEDYERRHANDLER_logFile("Mammography enhancement failed."); HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module ended. =================\n\n"); return INNER_ERR; } // 3. 对图像进行灰度值变换 ERRTYPE errHandler = SUCCESS; // 3.1 使用Log函数 if(strOprType.Compare(_T("Log")) == 0){ errHandler = pLPE.grayValueTransformLog(); if(errHandler){ HEDYERRHANDLER_errLogFile( INNER_ERR, "grayValueTransformLog failed"); } } // 3.2 使用ModLog函数 else if(strOprType.Compare(_T("ModLog")) == 0){ errHandler = pLPE.grayValueTransformModLog(); if(errHandler){ HEDYERRHANDLER_errLogFile( INNER_ERR, "grayValueTransformModLog failed"); } } // 3.3 使用Sqrt函数 else if(strOprType.Compare(_T("Sqrt")) == 0){ FLOAT fSqrtTissueThres = pfParamList[0]; FLOAT fSqrtAECThres = pfParamList[1]; errHandler = pLPE.grayValueTransformSqrt(fSqrtTissueThres, fSqrtAECThres); if(errHandler){ HEDYERRHANDLER_errLogFile( INNER_ERR, "grayValueTransformSqrt failed"); } } // 3.4 不使用灰度变换函数。 else if(strOprType.Compare(_T("None")) == 0){ HEDYERRHANDLER_logFile("No grayValueTransform."); } // 3.5 关键字错误,不存在该函数。 else{ errHandler = PARAM_ERR; HEDYERRHANDLER_errLogFile( PARAM_ERR, "Incorrect grayValueTransform operator!"); HEDYERRHANDLER_errLogFile( PARAM_ERR, "None, Log, ModLog, Sqrt operator are valid!"); } if(errHandler){ pLPE.destroy(); HEDYERRHANDLER_logFile("Error in function SetPreProcPara."); HEDYERRHANDLER_logFile("Mammography enhancement failed."); HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module ended. =================\n\n"); return INNER_ERR; } // 4. 对图像尺寸进行预处理 if(pLPE.preProc()){ pLPE.destroy(); HEDYERRHANDLER_errLogFile(INNER_ERR, "preProcessing failed"); HEDYERRHANDLER_logFile("Mammography enhancement failed."); HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module ended. =================\n\n"); return INNER_ERR; } return SUCCESS; } // 功能: // 设置曲线增强算法准备参数及各级增强曲线参数 // 参数: // nLev 层级索引 // pstrEnOprType 增强函数列表 // ppfParamList 增强函数参数列表 // pfWDetails 细节微调参数集 // nDetailLev 图像细节权重参数 // 返回: // 成功设置参数集返回SUCCESS,否则返回错误代码 ERRTYPE LPEALGORI_API SetDeconPara( INT nLev, CString* pstrEnOprType, PFLOAT* ppfParamList, PFLOAT pfWDetails, INT nDetailLev) { ENHANCEOPRTYPE* pEnOprType = new ENHANCEOPRTYPE [nLev]; for(int n = 0; n < nLev; ++n){ // 1. 增强边缘图像 // 1.1 使用log增强函数 if(pstrEnOprType[n].Compare(_T("LogOpr")) == 0){ pEnOprType[n] = LOG_OPR; } // 1.2 使用sigmoid增强函数 else if(pstrEnOprType[n].Compare(_T("SigmoidOpr")) == 0){ pEnOprType[n] = SIGMOID_OPR; } // 1.3 使用HarmoS增强函数 else if(pstrEnOprType[n].Compare(_T("HarmoOpr")) == 0){ pEnOprType[n] = HARMO_OPR; } // 1.4 关键字错误,不存在该增强函数 else { delete [] pEnOprType; pEnOprType = NULL; pLPE.destroy(); HEDYERRHANDLER_errLogFile(PARAM_ERR, "Incorrect enhance operator"); HEDYERRHANDLER_errLogFile(INNER_ERR, "LogOpr,SigmoidOpr, HarmoOpr are valid!"); HEDYERRHANDLER_logFile("Error in function SetDeconPara"); HEDYERRHANDLER_logFile("Mammography enhancement failed."); HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module ended. =================\n\n"); return PARAM_ERR; } } if(pLPE.performLPD(pEnOprType, ppfParamList, pfWDetails, nDetailLev)){ delete [] pEnOprType; pEnOprType = NULL; pLPE.destroy(); HEDYERRHANDLER_errLogFile(INNER_ERR, "Image decomposition failed"); HEDYERRHANDLER_logFile("Error in function SetDeconPara"); HEDYERRHANDLER_logFile("Mammography enhancement failed."); HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module ended. =================\n\n"); return INNER_ERR; } delete [] pEnOprType; pEnOprType = NULL; return SUCCESS; } // 功能: // 设置重建函数 // 参数: // nLev 层级索引 // pOprType 合成函数列表 // ppfParamList 合成函数参数列表 // pfWDepthes 层次微调参数集 // nDepthLev 图像层次权重参数 // fwMicroCa 钙化增强权重参数 // 返回: // 成功设置参数集返回SUCCESS,否则返回错误代码 ERRTYPE LPEALGORI_API SetReconPara( INT nLev, CString* pStrCurveType, PFLOAT* ppfParamList, PFLOAT pfWDepthes, INT nDepthLev, INT nMicroCaLev) { CURVETYPE* pDCOprType = new CURVETYPE [nLev]; for(int n = 0; n < nLev; ++n){ // 1. 压缩图像DC分量 // 1.1 不使用DC压缩函数 if(pStrCurveType[n].Compare(_T("")) == 0){ pDCOprType[n] = NULL_CUR; } // 1.2 使用Hybrid函数 else if(pStrCurveType[n].Compare(_T("Hybrid")) == 0){ pDCOprType[n] = HYBRID_CUR; } // 1.3 使用InvLog函数 else if(pStrCurveType[n].Compare(_T("InvLog")) == 0){ pDCOprType[n] = INVLOG_CUR; } // 1.4 使用ModLog函数 else if(pStrCurveType[n].Compare(_T("ModLog")) == 0){ pDCOprType[n] = MODLOG_CUR; } // 1.5 使用Sigmoid函数 else if(pStrCurveType[n].Compare(_T("Sigmoid")) == 0){ pDCOprType[n] = SIGMOID_CUR; } // 1.6 使用SLinear函数 else if(pStrCurveType[n].Compare(_T("SLinear")) == 0){ pDCOprType[n] = SLINEAR_CUR; } // 1.7 使用HarmoS函数 else if(pStrCurveType[n].Compare(_T("HarmoS")) == 0){ pDCOprType[n] = HARMO_CUR; } // 1.8 关键字错误,不存在该DC压缩函数 else { delete [] pDCOprType; pDCOprType = NULL; pLPE.destroy(); HEDYERRHANDLER_errLogFile(PARAM_ERR, "Incorrect DC operator"); HEDYERRHANDLER_errLogFile(PARAM_ERR, "Hybrid, InvLog, ModLog, Sigmoid, SLinear and HarmoS are valid!"); HEDYERRHANDLER_logFile("Error in function SetReconPara"); HEDYERRHANDLER_logFile("Mammography enhancement failed."); HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module ended. =================\n\n"); return PARAM_ERR; } } if(pLPE.performLPRecon( pDCOprType, ppfParamList, pfWDepthes, nDepthLev, nMicroCaLev)){ delete [] pDCOprType; pDCOprType = NULL; pLPE.destroy(); HEDYERRHANDLER_errLogFile(INNER_ERR, "Image reconstruction failed"); HEDYERRHANDLER_logFile("Error in function SetReconPara"); HEDYERRHANDLER_logFile("Mammography enhancement failed."); HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module ended. =================\n\n"); return INNER_ERR; } delete [] pDCOprType; pDCOprType = NULL; return SUCCESS; } // 功能: // 设置后处理映射曲线参数 // 参数: // strOprType 选择后处理函数 // pfParamList 后处理函数参数列表 // 返回: // 成功设置参数集返回SUCCESS,否则返回错误代码 ERRTYPE LPEALGORI_API SetPostProcPara( CString curveType, PFLOAT pfParamList) { ERRTYPE errHandler = SUCCESS; // 1. 不使用后处理映射曲线 if(curveType.Compare(_T("")) == 0){ HEDYERRHANDLER_logFile("No postProc performed."); } // 2. 使用Linear后处理映射曲线 else if(curveType.Compare(_T("LinearOpr")) == 0){ errHandler = pLPE.postProcLinear(pfParamList[0], pfParamList[1]); if(errHandler){ HEDYERRHANDLER_errLogFile( INNER_ERR, "postProcLinear failed"); } } // 3. 使用SigmoidOpr后处理映射曲线 else if(curveType.Compare(_T("SigmoidOpr")) == 0){ errHandler = pLPE.postProcSigmoid( pfParamList[0], pfParamList[1], pfParamList[2], pfParamList[3], pfParamList[4], pfParamList[5]); if(errHandler){ HEDYERRHANDLER_errLogFile( INNER_ERR, "postProcSigmoid failed"); } } // 4. 使用AdaptSigmoidOpr后处理映射曲线 else if(curveType.Compare(_T("AdaptSigmoidOpr")) == 0){ errHandler = pLPE.postProcAdaptiveSigmoid( pfParamList[0], pfParamList[1]); if(errHandler){ HEDYERRHANDLER_errLogFile( INNER_ERR, "postProcAdaptiveSigmoid failed"); } } // 5. 使用ModSigmoidOpr后处理映射曲线 else if(curveType.Compare(_T("ModSigmoidOpr")) == 0){ errHandler = pLPE.postProcModSigmoid( pfParamList[0], pfParamList[1], pfParamList[2], pfParamList[3], pfParamList[4], pfParamList[5]); if(errHandler){ HEDYERRHANDLER_errLogFile( INNER_ERR, "postProcSigmoid failed"); } } // 6. 使用LogOpr后处理映射曲线 else if(curveType.Compare(_T("LogOpr")) == 0){ errHandler = pLPE.postProcLog( pfParamList[0], pfParamList[1], pfParamList[2], pfParamList[3]); if(errHandler){ HEDYERRHANDLER_errLogFile( INNER_ERR, "postProcLog failed"); } } // 7. Use Harmo curve. else if(curveType.Compare(_T("HarmoOpr")) == 0){ errHandler = pLPE.postProcHarmoS( pfParamList[0], pfParamList[1], pfParamList[2], pfParamList[3], pfParamList[4], pfParamList[5], pfParamList[6]); if(errHandler){ HEDYERRHANDLER_errLogFile( INNER_ERR, "postProcHarmo failed"); } } else{ errHandler = PARAM_ERR; HEDYERRHANDLER_errLogFile( PARAM_ERR, "Incorrect postProc operator!"); HEDYERRHANDLER_errLogFile( PARAM_ERR, "LinearOpr, SigmoidOpr, AdaptSigmoidOpr,", " ModSigmoidOpr, LogOpr, HarmoOpr are valid!"); } if(errHandler){ pLPE.destroy(); HEDYERRHANDLER_logFile("Error in function SetPostProcPara"); HEDYERRHANDLER_logFile("Mammography enhancement failed."); HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module ended. =================\n\n"); return INNER_ERR; } return SUCCESS; } //功能: // 取出增强后的图像 //参数: // pOutImg 增强后图像 //返回: // 成功设置参数集返回SUCCESS,否则返回错误代码 ERRTYPE LPEALGORI_API GetEnImg(PWORD pOutImg) { INT dataLen = pLPE.nDataLen_; #pragma omp parallel for schedule(static, 1) for(INT idx = 0; idx < dataLen; ++idx){ pOutImg[idx] = WORD(pLPE.pfInData_[idx]); } pLPE.destroy(); HEDYERRHANDLER_logFile("Enhancement Succeed!"); HEDYERRHANDLER_logFile( "================= HEDY_MAMMO Module ended. =================\n\n"); return SUCCESS; } 解释代码
08-05
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

matianlongg

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值