CUDA测试程序运行错误
Code
代码
1 import numpy as np 2 from timeit import default_timer as timer 3 from numba import vectorize 4 5 @vectorize(["float32(float32, float32)"], target='cuda') 6 def vectorAdd(a, b): 7 return a + b 8 9 def main(): 10 N = 320000 11 12 A = np.ones(N, dtype=np.float32 ) 13 B = np.ones(N, dtype=np.float32 ) 14 C = np.zeros(N, dtype=np.float32 ) 15 16 start = timer() 17 C = vectorAdd(A, B) 18 vectorAdd_time = timer() - start 19 20 print("c[:5] = " + str(C[:5])) 21 print("c[-5:] = " + str(C[-5:])) 22 23 print("vectorAdd took %f seconds " % vectorAdd_time) 24 25 if __name__ == '__main__': 26 main()
Question
相关BUG查询网址
Some Methods
以下参考网址
打开对应文件libs.py、nvvm.py:
def get_libdevice(arch): libdir = (os.environ.get('NUMBAPRO_LIBDEVICE') or os.environ.get('NUMBAPRO_CUDALIB')) pat = r'libdevice\.%s(\.\d+)*\.bc$' % arch candidates = find_file(re.compile(pat), libdir) return max(candidates) if candidates else None def get_cudalib(lib, platform=None): if lib == 'nvvm' and os.environ.get('NUMBAPRO_NVVM'): return os.environ.get('NUMBAPRO_NVVM') libdir = os.environ.get('NUMBAPRO_CUDALIB') candidates = find_lib(lib, libdir, platform) return max(candidates) if candidates else None
加入对应环境变量后,复制D:\Anaconda3\pkgs\cudatoolkit-9.0-1\DLLs(当前电脑CUDA的安装路径)目录下的libdevice.10.bc文件的为libdevice.computeXX.10.bc(当前为libdevice.compute50.10.bc),放在当前文件夹下。配置结束后,运行结果如下:
告警相关:
def _compute_thread_per_block(kernel, tpb): if tpb != 0: return tpb else: try: tpb = kernel.autotune.best() except ValueError: warnings.warn('Could not autotune, using default tpb of 128') tpb = 128 return tpb