# PyCUDA矩阵乘法

10 篇文章 2 订阅
import numpy as np
from pycuda import driver, gpuarray
from pycuda.compiler import SourceModule
import pycuda.autoinit
MATRIX_SIZE = 3
matrix_mul_kernel = """
__global__ void Matrix_Mul_Kernel(float *d_a, float *d_b, float *d_c)
{
float value = 0;

for (int i = 0; i < %(MATRIX_SIZE)s; ++i)
{
float d_a_element = d_a[ty * %(MATRIX_SIZE)s + i];
float d_b_element = d_b[i * %(MATRIX_SIZE)s + tx];
value += d_a_element * d_b_element;
}

d_c[ty * %(MATRIX_SIZE)s + tx] = value;
} """

matrix_mul = matrix_mul_kernel % {'MATRIX_SIZE': MATRIX_SIZE}

mod = SourceModule(matrix_mul)

h_a = np.random.randint(1,5,(MATRIX_SIZE, MATRIX_SIZE)).astype(np.float32)
h_b = np.random.randint(1,5,(MATRIX_SIZE, MATRIX_SIZE)).astype(np.float32)

# compute on the CPU to verify GPU computation
h_c_cpu = np.dot(h_a, h_b)

d_a = gpuarray.to_gpu(h_a)
d_b = gpuarray.to_gpu(h_b)

d_c_gpu = gpuarray.empty((MATRIX_SIZE, MATRIX_SIZE), np.float32)

matrixmul = mod.get_function("Matrix_Mul_Kernel")

matrixmul(d_a, d_b,d_c_gpu, block = (MATRIX_SIZE, MATRIX_SIZE, 1))

if (h_c_cpu.all() == d_c_gpu.get().all()) :
print("\n\nThe computed matrix multiplication is correct")


• 0
点赞
• 0
收藏
觉得还不错? 一键收藏
• 打赏
• 0
评论
08-28
04-05 37
06-07 4303
12-04 3587
04-05 93
05-12 1010
08-01 2020
12-24
12-24
04-09 4万+
09-30 2762
01-03 1946
12-03 568

### “相关推荐”对你有帮助么？

• 非常没帮助
• 没帮助
• 一般
• 有帮助
• 非常有帮助

¥1 ¥2 ¥4 ¥6 ¥10 ¥20

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。