矩阵的加法
-
导入必要的库
import numpy as np import pycuda.autoinit import pycuda.driver as cuda from pycuda.compiler import SourceModule
-
编写核函数 注意: 三引号里面的语法是c/c++的语法
mod = SourceModule(""" __global__ void gpu_add(float* x, float* y, size_t N) { int index = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = index; i < N; i += stride) { y[i] = x[i] + y[i] } } """)
-
编写测试函数
def test(N): a = np.empty(N, dtype=np.float32) a[:] = 10.0 b = np.empty(N, dtype=np.float32