我服了,最近在学习cuda,使用了pycuda,照着 <<Hands-On-GPU-Programming-with-Python-and-CUDA>> 运行例子,在第七章需要使用scikit-cuda的时候遇到了一揽子的问题,记录如下:
1. pip install skcuda 不识别
名字错了,不是skcuda,全名叫 scikit-cuda , 需要使用 pip install scikit-cuda
2. 遇到了 OSError: CUDA runtime library not found
提示如下
from skcuda import cublas
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\cublas.py", line 22, in <module>
from . import cuda
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\cuda.py", line 9, in <module>
from .cudart import *
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\cudart.py", line 41, in <module>
raise OSError('CUDA runtime library not found')
OSError: CUDA runtime library not found
按照错误的提示,这里我们直接跳到 "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\cudart.py" 的41行,代码如下:
# Print understandable error message when library cannot be found:
_libcudart = None
# print(_libcudart_libname_list)
for _libcudart_libname in _libcudart_libname_list:
try:
if sys.platform == 'win32':
_libcudart = ctypes.windll.LoadLibrary(_libcudart_libname)
else:
_libcudart = ctypes.cdll.LoadLibrary(_libcudart_libname)
except OSError:
print("os error at", _libcudart_libname)
pass
else:
break
if _libcudart == None:
raise OSError('CUDA runtime library not found')
我通过打印_libcudart_libname_list,可以看到 ['cudart.dll', 'cudart64_101.dll', 'cudart64_100.dll', 'cudart64_92.dll', 'cudart64_91.dll', 'cudart64_90.dll', 'cudart64_80.dll', 'cudart64_75.dll', 'cudart64_70.dll', 'cudart64_65.dll', 'cudart64_60.dll', 'cudart64_55.dll', 'cudart64_50.dll', 'cudart64_40.dll'],这里的错误就很明显了,我使用了cuda环境是cuda12,这里的命名应该是cudart64_版本号,所以我的版本太新了,找不到,cuda12的运行时环境叫做 cudart64_12.dll,网上有各种邪门技巧,有把这个重命名的,有添加各种环境变量的,我为了简单,采用了最直接的做法,改代码。
上述代码修改后如下:
# Print understandable error message when library cannot be found:
_libcudart = None
# print(_libcudart_libname_list)
for _libcudart_libname in _libcudart_libname_list:
try:
if sys.platform == 'win32':
_libcudart = ctypes.windll.LoadLibrary(_libcudart_libname)
else:
_libcudart = ctypes.cdll.LoadLibrary(_libcudart_libname)
except OSError:
print("os error at", _libcudart_libname)
pass
else:
break
# ----------------------------------------------
if _libcudart == None:
_libcudart = ctypes.cdll.LoadLibrary('C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/cudart64_12.dll')
# ----------------------------------------------
if _libcudart == None:
raise OSError('CUDA runtime library not found')
附,可能有人说硬编码LoadLibrary这操作太草台班子,我给你们看下它源码就知道了,世界其实就是一个草台班子……
# Load library:
_linux_version_list = [10.1, 10.0, 9.2, 9.1, 9.0, 8.0, 7.5, 7.0, 6.5, 6.0, 5.5, 5.0, 4.0]
_win32_version_list = [10, 100, 92, 91, 90, 80, 75, 70, 65, 60, 55, 50, 40]
if 'linux' in sys.platform:
_libcublas_libname_list = ['libcublas.so'] + \
['libcublas.so.%s' % v for v in _linux_version_list]
elif sys.platform == 'darwin':
_libcublas_libname_list = ['libcublas.dylib']
elif sys.platform == 'win32':
if sys.maxsize > 2**32:
_libcublas_libname_list = ['cublas.dll'] + \
['cublas64_%s.dll' % v for v in _win32_version_list]
else:
_libcublas_libname_list = ['cublas.dll'] + \
['cublas32_%s.dll' % v for v in _win32_version_list]
else:
raise RuntimeError('unsupported platform')
所以,它也是一个一个硬编码的,只不过没编我的版本而已,所以我加上也就是顺理成章的事情……
3. 遇到了 OSError: cublas library not found
一样的事情,首先提示如下
['cudart.dll', 'cudart64_101.dll', 'cudart64_100.dll', 'cudart64_92.dll', 'cudart64_91.dll', 'cudart64_90.dll', 'cudart64_80.dll', 'cudart64_75.dll', 'cudart64_70.dll', 'cudart64_65.dll', 'cudart64_60.dll', 'cudart64_55.dll', 'cudart64_50.dll', 'cudart64_40.dll']
...
from skcuda import cublas
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\cublas.py", line 56, in <module>
raise OSError('cublas library not found')
OSError: cublas library not found
老套路,我们去看一下那个代码
# Print understandable error message when library cannot be found:
_libcublas = None
for _libcublas_libname in _libcublas_libname_list:
try:
if sys.platform == 'win32':
_libcublas = ctypes.windll.LoadLibrary(_libcublas_libname)
else:
_libcublas = ctypes.cdll.LoadLibrary(_libcublas_libname)
except OSError:
pass
else:
break
if _libcublas == None:
raise OSError('cublas library not found')
所以,也是一样的原因,版本太新了,更新没跟上。
修改后代码如下
# Print understandable error message when library cannot be found:
_libcublas = None
for _libcublas_libname in _libcublas_libname_list:
try:
if sys.platform == 'win32':
_libcublas = ctypes.windll.LoadLibrary(_libcublas_libname)
else:
_libcublas = ctypes.cdll.LoadLibrary(_libcublas_libname)
except OSError:
pass
else:
break
# -------------------------------------------------------
if _libcublas == None:
_libcublas = ctypes.cdll.LoadLibrary('C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/cublas64_12.dll')
# -------------------------------------------------------
if _libcublas == None:
raise OSError('cublas library not found')
如果你是其他cuda版本,大致路径也是类似的,搜索一下就能找到了。
4. 遇到了 OSError: cufft library not found
提示如下
from skcuda import fft
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\fft.py", line 14, in <module>
from . import cufft
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\cufft.py", line 46, in <module>
raise OSError('cufft library not found')
一样的事情,跳转到 “C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\cufft.py”,第46行
# Print understandable error message when library cannot be found:
_libcufft = None
for _libcufft_libname in _libcufft_libname_list:
try:
if sys.platform == 'win32':
_libcufft = ctypes.windll.LoadLibrary(_libcufft_libname)
else:
_libcufft = ctypes.cdll.LoadLibrary(_libcufft_libname)
except OSError:
pass
else:
break
if _libcufft == None:
raise OSError('cufft library not found')
这次比较神奇,本地cuda12的cufft 居然是11的,没关系,11就11。修改后代码如下
# Print understandable error message when library cannot be found:
_libcufft = None
for _libcufft_libname in _libcufft_libname_list:
try:
if sys.platform == 'win32':
_libcufft = ctypes.windll.LoadLibrary(_libcufft_libname)
else:
_libcufft = ctypes.cdll.LoadLibrary(_libcufft_libname)
except OSError:
pass
else:
break
# ---------------------------------
if _libcufft == None:
_libcufft = ctypes.cdll.LoadLibrary('C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/cufft64_11.dll')
# ---------------------------------
if _libcufft == None:
raise OSError('cufft library not found')
5. 遇到了 AttributeError: module 'numpy' has no attribute 'typeDict'. Did you mean: 'sctypeDict'?
提示如下
from skcuda import fft
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\fft.py", line 20, in <module>
from . import misc
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\misc.py", line 637, in <module>
num_types = [np.typeDict[t] for t in \
^^^^^^^^^^^
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\numpy\__init__.py", line 347, in __getattr__
raise AttributeError("module {!r} has no attribute "
AttributeError: module 'numpy' has no attribute 'typeDict'. Did you mean: 'sctypeDict'?
由于numpy版本不兼容,typeDict已经在较新版本的NumPy中被弃用,应该使用numpy.sctypeDict代替。有一种说法是将numpy版本降下来,这不是开历史倒车么,想了想,我还是改代码吧。
看提示知道,跳转到"C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\misc.py" 的637行,修改后代码如下
# List of available numerical types provided by numpy:
# num_types = [np.typeDict[t] for t in \
# np.typecodes['AllInteger']+np.typecodes['AllFloat']]
num_types = [np.sctypeDict [t] for t in \
np.typecodes['AllInteger']+np.typecodes['AllFloat']]
6. 遇到了 OSError: cusolver library not found
好的,我直接贴修改后的代码
# Print understandable error message when library cannot be found:
_libcusolver = None
for _libcusolver_libname in _libcusolver_libname_list:
try:
if sys.platform == 'win32':
_libcusolver = ctypes.windll.LoadLibrary(_libcusolver_libname)
else:
_libcusolver = ctypes.cdll.LoadLibrary(_libcusolver_libname)
except OSError:
pass
else:
break
# --------------------------------------
if _libcusolver == None:
_libcusolver = ctypes.cdll.LoadLibrary('C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.2/bin/cusolver64_11.dll')
# --------------------------------------
if _libcusolver == None:
raise OSError('cusolver library not found')
7. 遇到了module 'numpy' has no attribute 'float'
np.float从1.24起被删除。所用的代码是依赖于旧版本的Numpy。
只需将 numpy 的别名替换为内置的 Python 类型float就可以解决问题。
即 np.float -> float
8. 遇到 pycuda._driver.LogicError: cuFuncSetBlockShape failed: invalid resource handle
这个困扰了比较久,最后看到了一个类似的例子 https://github.com/inducer/pycuda/discussions/406
应该是调用fft的地方需要有cuda的上下文才行,提示如下
fft.ifft(y_fft, out_gpu, inverse_plan, scale=True)
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\fft.py", line 304, in ifft
_fft(x_gpu, y_gpu, plan, cufft.CUFFT_INVERSE, y_gpu.size/plan.batch)
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\skcuda\fft.py", line 208, in _fft
func(y_gpu.dtype.type(scale), y_gpu)
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\pycuda\elementwise.py", line 286, in __call__
func.prepared_async_call(grid, block, stream, *invocation_args)
File "C:\Users\hbzzh\AppData\Local\Programs\Python\Python312\Lib\site-packages\pycuda\driver.py", line 604, in function_prepared_async_call
func._set_block_shape(*block)
pycuda._driver.LogicError: cuFuncSetBlockShape failed: invalid resource handle
调用段需要修改对应的代码如下
# ----------------------------------
import pycuda.driver as drv
# ----------------------------------
def cufft_conv(x , y):
# ----------------------------------
drv.init()
pycuda_ctx = drv.Device(0).retain_primary_context()
# ----------------------------------
x = x.astype(np.complex64)
y = y.astype(np.complex64)
if (x.shape != y.shape):
return -1
plan = fft.Plan(x.shape, np.complex64, np.complex64)
inverse_plan = fft.Plan(x.shape, np.complex64, np.complex64)
x_gpu = gpuarray.to_gpu(x)
y_gpu = gpuarray.to_gpu(y)
x_fft = gpuarray.empty_like(x_gpu, dtype=np.complex64)
y_fft = gpuarray.empty_like(y_gpu, dtype=np.complex64)
out_gpu = gpuarray.empty_like(x_gpu, dtype=np.complex64)
fft.fft(x_gpu, x_fft, plan)
fft.fft(y_gpu, y_fft, plan)
linalg.multiply(x_fft, y_fft, overwrite=True)
# ----------------------------------
pycuda_ctx.push()
# ----------------------------------
fft.ifft(y_fft, out_gpu, inverse_plan, scale=True)
# ----------------------------------
pycuda_ctx.pop()
# ----------------------------------
conv_out = out_gpu.get()
return conv_out